def __init__(self,
                 expt_dir,
                 covar="Matern52",
                 mcmc_iters=10,
                 pending_samples=100,
                 noiseless=False,
                 burnin=100,
                 grid_subset=20):
        self.cov_func = getattr(gp, covar)
        self.locker = Locker()
        self.state_pkl = os.path.join(expt_dir, self.__module__ + ".pkl")
        self.stats_file = os.path.join(
            expt_dir, self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters = int(mcmc_iters)
        self.burnin = int(burnin)
        self.needs_burnin = True
        self.pending_samples = int(pending_samples)
        self.D = -1
        self.hyper_iters = 1
        # Number of points to optimize EI over
        self.grid_subset = int(grid_subset)
        self.noiseless = bool(int(noiseless))
        self.hyper_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale = 1  # zero-mean log normal prior
        self.max_ls = 10  # top-hat prior on length scales
        self.prior = False
        self.prior_mean = None
        self.prior_cov = None
        self.expt_grid = None
Esempio n. 2
0
    def __init__(self,
                 expt_dir,
                 covar="Matern52",
                 mcmc_iters=10,
                 pending_samples=100,
                 noiseless=False,
                 burnin=100,
                 grid_subset=20,
                 use_multiprocessing=True):
        self.cov_func = getattr(gp, covar)
        self.locker = Locker()
        self.state_pkl = os.path.join(expt_dir, self.__module__ + ".pkl")
        self.stats_file = os.path.join(
            expt_dir, self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters = int(mcmc_iters)
        self.burnin = int(burnin)
        self.needs_burnin = True
        self.pending_samples = int(pending_samples)
        self.D = -1
        self.hyper_iters = 1
        # Number of points to optimize EI over
        self.grid_subset = int(grid_subset)
        self.noiseless = bool(int(noiseless))
        self.hyper_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale = 1  # zero-mean log normal prior
        self.max_ls = 2  # top-hat prior on length scales
        self.sample_points = 4
        self.samples_per_point = 3
        self.sample_from = 10

        # If multiprocessing fails or deadlocks, set this to False
        self.use_multiprocessing = bool(int(use_multiprocessing))
Esempio n. 3
0
    def __init__(self, expt_dir, variables=None, grid_size=None, grid_seed=1):
        self.expt_dir = expt_dir
        self.jobs_pkl = os.path.join(expt_dir, 'expt-grid.pkl')
        self.locker   = Locker()

        # Only one process at a time is allowed to have access to this.
        sys.stderr.write("Waiting to lock grid...")
        self.locker.lock_wait(self.jobs_pkl)
        sys.stderr.write("...acquired\n")

        # Does this exist already?
        if variables is not None and not os.path.exists(self.jobs_pkl):

            # Set up the grid for the first time.
            self.seed = grid_seed
            self.vmap   = GridMap(variables, grid_size)
            self.grid   = self.hypercube_grid(self.vmap.card(), grid_size)
            self.status = np.zeros(grid_size, dtype=int) + CANDIDATE_STATE
            self.values = np.zeros(grid_size) + np.nan
            self.durs   = np.zeros(grid_size) + np.nan
            self.sgeids = np.zeros(grid_size, dtype=int)

            # Save this out.
            self._save_jobs()
        else:

            # Load in from the pickle.
            self._load_jobs()
Esempio n. 4
0
def job_submit(name, output_file, job_file, working_dir):

    cmd = ('''python spearmint_sync.py --wrapper "%s" > %s''' % 
           (job_file, output_file))
    output_file = open(output_file, 'w')

    # Submit the job.
    locker = Locker()
    locker.unlock(working_dir + '/expt-grid.pkl')
    process = subprocess.Popen(cmd,
                               stdout=output_file,
                               stderr=output_file, shell=True)

    return process
def job_submit(name, output_file, job_file, working_dir):
    spearmint_path = os.path.realpath(__file__)
    cmd = ('python ' + spearmint_path + ' --wrapper "%s" > %s' % 
           (job_file, output_file))
    output_file = open(output_file, 'w')

    # Submit the job.
    locker = Locker()
    locker.unlock(working_dir + '/expt-grid.pkl')
    process = subprocess.Popen(cmd,
                               stdout=output_file,
                               stderr=output_file, shell=True)

    return process
Esempio n. 6
0
    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=10,
                 pending_samples=100, noiseless=False):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.mcmc_iters      = int(mcmc_iters)
        self.pending_samples = pending_samples
        self.D               = -1
        self.hyper_iters     = 1
        self.noiseless       = bool(int(noiseless))

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 2    # top-hat prior on length scales
def job_submit(name, output_file, job_file, working_dir):
    spearmint_path = os.path.realpath(__file__)
    cmd = ('python ' + spearmint_path + ' --wrapper "%s" > %s' %
           (job_file, output_file))
    output_file = open(output_file, 'w')

    # Submit the job.
    locker = Locker()
    locker.unlock(working_dir + '/expt-grid.pkl')
    process = subprocess.Popen(cmd,
                               stdout=output_file,
                               stderr=output_file,
                               shell=True)

    return process
Esempio n. 8
0
def job_submit(name, output_file, job_file, working_dir):

    cmd = ('''python spearmint_sync.py --wrapper "%s" > %s''' %
           (job_file, output_file))
    output_file = open(output_file, 'w')

    # Submit the job.
    locker = Locker()
    locker.unlock(working_dir + '/expt-grid.pkl')
    process = subprocess.Popen(cmd,
                               stdout=output_file,
                               stderr=output_file,
                               shell=True)

    return process
Esempio n. 9
0
    def __init__(self, expt_dir, variables=None, grid_size=None, grid_seed=1):
        self.expt_dir = expt_dir
        self.jobs_pkl = os.path.join(expt_dir, 'expt-grid.pkl')
        self.locker   = Locker()

        # Only one process at a time is allowed to have access to this.
        sys.stderr.write("Waiting to lock grid...")
        self.locker.lock_wait(self.jobs_pkl)
        sys.stderr.write("...acquired\n")

        # Does this exist already?
        if variables is not None and not os.path.exists(self.jobs_pkl):

            # Set up the grid for the first time.
            self.seed = grid_seed
            self.vmap   = GridMap(variables, grid_size)
            self.grid   = self.hypercube_grid(self.vmap.card(), grid_size)
            self.status = np.zeros(grid_size, dtype=int) + CANDIDATE_STATE
            self.values = np.zeros(grid_size) + np.nan
            self.durs   = np.zeros(grid_size) + np.nan
            self.sgeids = np.zeros(grid_size, dtype=int)

            # Save this out.
            self._save_jobs()
        else:

            # Load in from the pickle.
            self._load_jobs()
Esempio n. 10
0
def job_submit(name, output_file, job_file, working_dir):
    if os.name == 'nt':
        cmd = ('''python spearmint_sync.py --wrapper "%s"''' % (job_file))
    else:
        cmd = ('''python spearmint_sync.py --wrapper "%s" > "%s"''' % 
               (job_file, output_file))
    output_file = open(output_file, 'w')

    # Submit the job.
    locker = Locker()
    locker.unlock(os.path.join(working_dir, 'expt-grid.pkllock'))
    process = subprocess.Popen(cmd,
                               stdout=output_file,
                               stderr=output_file, shell=True)

    return process
Esempio n. 11
0
    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=10,
                 pending_samples=100, noiseless=False, burnin=100,
                 grid_subset=20):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.stats_file      = os.path.join(expt_dir,
                                   self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters      = int(mcmc_iters)
        self.burnin          = int(burnin)
        self.needs_burnin    = True
        self.pending_samples = pending_samples
        self.D               = -1
        self.hyper_iters     = 1
        # Number of points to optimize EI over
        self.grid_subset     = int(grid_subset)
        self.noiseless       = bool(int(noiseless))
        self.hyper_samples = []
        self.time_hyper_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 10    # top-hat prior on length scales

        self.time_noise_scale = 0.1  # horseshoe prior
        self.time_amp2_scale  = 1    # zero-mean log normal prior
        self.time_max_ls      = 10   # top-hat prior on length scales
    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=20, 
                 pending_samples=100, noiseless=False, burnin=100,
                 grid_subset=20, constraint_violating_value=np.inf,
                 visualize2D=False):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.stats_file      = os.path.join(expt_dir, 
                                   self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters      = int(mcmc_iters)
        self.burnin          = int(burnin)
        self.needs_burnin    = True
        self.pending_samples = pending_samples
        self.D               = -1
        self.hyper_iters     = 1
        # Number of points to optimize EI over
        self.grid_subset     = int(grid_subset)
        self.noiseless       = bool(int(noiseless))
        self.hyper_samples   = []
        self.constraint_hyper_samples = []
        self.ff              = None
        self.ff_samples      = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 2    # top-hat prior on length scales

        self.constraint_noise_scale = 0.1  # horseshoe prior
        self.constraint_amp2_scale  = 1    # zero-mean log normal prio
        self.constraint_gain        = 1   # top-hat prior on length scales
        self.constraint_max_ls      = 2   # top-hat prior on length scales
        self.bad_value = float(constraint_violating_value)
        self.visualize2D            = visualize2D
Esempio n. 13
0
    def __init__(self, expt_dir, variables=None, grid_size=None, grid_seed=1):
        self.expt_dir = expt_dir
        self.jobs_pkl = os.path.join(expt_dir, EXPERIMENT_GRID_FILE)
        self.locker = Locker()

        # Only one process at a time is allowed to have access to the grid.
        self.locker.lock_wait(self.jobs_pkl)

        # Set up the grid for the first time if it doesn't exist.
        if variables is not None and not os.path.exists(self.jobs_pkl):
            self.seed = grid_seed
            self.vmap = GridMap(variables, grid_size)
            self.grid = self._hypercube_grid(self.vmap.card(), grid_size)
            self.status = np.zeros(grid_size, dtype=int) + CANDIDATE_STATE
            self.values = np.zeros(grid_size) + np.nan
            self.durs = np.zeros(grid_size) + np.nan
            self.proc_ids = np.zeros(grid_size, dtype=int)
            self._save_jobs()

        # Or load in the grid from the pickled file.
        else:
            self._load_jobs()
Esempio n. 14
0
    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=10,
                 pending_samples=100, noiseless=False):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.mcmc_iters      = int(mcmc_iters)
        self.pending_samples = pending_samples
        self.D               = -1
        self.hyper_iters     = 1
        self.noiseless       = bool(int(noiseless))

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 2    # top-hat prior on length scales
Esempio n. 15
0
    def __init__(self, expt_dir, variables=None, grid_size=None, grid_seed=1):
        self.expt_dir = expt_dir
        self.jobs_pkl = os.path.join(expt_dir, EXPERIMENT_GRID_FILE)
        self.locker   = Locker()

        # Only one process at a time is allowed to have access to the grid.
        self.locker.lock_wait(self.jobs_pkl)

        # Set up the grid for the first time if it doesn't exist.
        if variables is not None and not os.path.exists(self.jobs_pkl):
            self.seed     = grid_seed
            self.vmap     = GridMap(variables, grid_size)
            self.grid     = self._hypercube_grid(self.vmap.card(), grid_size)
            self.status   = np.zeros(grid_size, dtype=int) + CANDIDATE_STATE
            self.values   = np.zeros(grid_size) + np.nan
            self.durs     = np.zeros(grid_size) + np.nan
            self.proc_ids = np.zeros(grid_size, dtype=int)
            self._save_jobs()

        # Or load in the grid from the pickled file.
        else:
            self._load_jobs()
class GPEIOptChooser:
    def __init__(self,
                 expt_dir,
                 covar="Matern52",
                 mcmc_iters=10,
                 pending_samples=100,
                 noiseless=False,
                 burnin=100,
                 grid_subset=20):
        self.cov_func = getattr(gp, covar)
        self.locker = Locker()
        self.state_pkl = os.path.join(expt_dir, self.__module__ + ".pkl")
        self.stats_file = os.path.join(
            expt_dir, self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters = int(mcmc_iters)
        self.burnin = int(burnin)
        self.needs_burnin = True
        self.pending_samples = int(pending_samples)
        self.D = -1
        self.hyper_iters = 1
        # Number of points to optimize EI over
        self.grid_subset = int(grid_subset)
        self.noiseless = bool(int(noiseless))
        self.hyper_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale = 1  # zero-mean log normal prior
        self.max_ls = 10  # top-hat prior on length scales
        self.prior = False
        self.prior_mean = None
        self.prior_cov = None
        self.expt_grid = None

    def setPrior(self, prior):
        self.prior = prior
        self.prior_mean = np.array([0.4, 1, 75])
        self.prior_cov = np.array([[6, 0.5, 0.5], [0.5, 1.1, -0.5],
                                   [0.5, -0.5, 6]])
        #np.array([[1.1,0.5,0.5],[0.5,1.1,-0.5],[0.5,-0.5,1.1]])

    def dump_hypers(self):
        sys.stderr.write("Waiting to lock hyperparameter pickle...")
        #sys.stderr.write("GPEIOptChooser Before acquiring lock; ABHIMANU")
        self.locker.lock_wait(self.state_pkl)
        #sys.stderr.write("GPEIOptChooser After acquiring lock; ABHIMANU")
        sys.stderr.write("...acquired\n")

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump(
            {
                'dims': self.D,
                'ls': self.ls,
                'amp2': self.amp2,
                'noise': self.noise,
                'mean': self.mean
            }, fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd)  # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

        # Write the hyperparameters out to a human readable file as well
        fh = open(self.stats_file, 'w')
        fh.write('Mean Noise Amplitude <length scales>\n')
        fh.write('-----------ALL SAMPLES-------------\n')
        meanhyps = 0 * np.hstack(self.hyper_samples[0])
        for i in self.hyper_samples:
            hyps = np.hstack(i)
            meanhyps += (1 / float(len(self.hyper_samples))) * hyps
            for j in hyps:
                fh.write(str(j) + ' ')
            fh.write('\n')

        fh.write('-----------MEAN OF SAMPLES-------------\n')
        for j in meanhyps:
            fh.write(str(j) + ' ')
        fh.write('\n')
        fh.close()

    def _real_init(self, dims, values):
        sys.stderr.write("Waiting to lock hyperparameter pickle...")
        self.locker.lock_wait(self.state_pkl)
        sys.stderr.write("...acquired\n")

        self.randomstate = npr.get_state()
        if os.path.exists(self.state_pkl):
            fh = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D = state['dims']
            self.ls = state['ls']
            self.amp2 = state['amp2']
            self.noise = state['noise']
            self.mean = state['mean']

            self.needs_burnin = False
        else:

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values)

            # Initial observation noise.
            self.noise = 1e-3

            # Initial mean.
            self.mean = np.mean(values)

            # Save hyperparameter samples
            self.hyper_samples.append(
                (self.mean, self.noise, self.amp2, self.ls))

        self.locker.unlock(self.state_pkl)

    def cov(self, x1, x2=None):
        if x2 is None:
            return self.amp2 * (self.cov_func(self.ls, x1, None) +
                                1e-6 * np.eye(x1.shape[0]))
        else:
            return self.amp2 * self.cov_func(self.ls, x1, x2)

    def set_expt_grid(self, expt_grid):
        print "set expt_grid Abhi:"
        self.expt_grid = expt_grid

    # Given a set of completed 'experiments' in the unit hypercube with
    # corresponding objective 'values', pick from the next experiment to
    # run according to the acquisition function.
    def next(self, grid, values, durations, candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete])

        # Grab out the relevant sets.
        comp = grid[complete, :]
        cand = grid[candidates, :]
        pend = grid[pending, :]
        vals = values[complete]
        numcand = cand.shape[0]

        # Spray a set of candidates around the min so far
        best_comp = np.argmin(vals)
        cand2 = np.vstack(
            (np.random.randn(10, comp.shape[1]) * 0.001 + comp[best_comp, :],
             cand))

        if self.mcmc_iters > 0:

            # Possibly burn in.
            if self.needs_burnin:
                for mcmc_iter in xrange(self.burnin):
                    self.sample_hypers(comp, vals)
                    sys.stderr.write(
                        "BURN %d/%d] mean: %.2f  amp: %.2f "
                        "noise: %.4f  min_ls: %.4f  max_ls: %.4f\n" %
                        (mcmc_iter + 1, self.burnin, self.mean,
                         np.sqrt(self.amp2), self.noise, np.min(
                             self.ls), np.max(self.ls)))
                self.needs_burnin = False

            # Sample from hyperparameters.
            # Adjust the candidates to hit ei peaks
            self.hyper_samples = []
            for mcmc_iter in xrange(self.mcmc_iters):
                self.sample_hypers(comp, vals)
                sys.stderr.write("%d/%d] mean: %.2f  amp: %.2f  noise: %.4f "
                                 "min_ls: %.4f  max_ls: %.4f\n" %
                                 (mcmc_iter + 1, self.mcmc_iters, self.mean,
                                  np.sqrt(self.amp2), self.noise,
                                  np.min(self.ls), np.max(self.ls)))
            self.dump_hypers()

            b = []  # optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))

            overall_ei = self.ei_over_hypers(comp, pend, cand2, vals)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            #print "top 20 EI abhimanu: ", overall_ei[inds, :]
            cand2 = cand2[inds, :]

            # This is old code to optimize each point in parallel. Uncomment
            # and replace if multiprocessing doesn't work
            #for i in xrange(0, cand2.shape[0]):
            #    sys.stderr.write("Optimizing candidate %d/%d\n" %
            #                     (i+1, cand2.shape[0]))
            #self.check_grad_ei(cand2[i,:].flatten(), comp, pend, vals)
            #    ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers,
            #                            cand2[i,:].flatten(), args=(comp,pend,vals),
            #                            bounds=b, disp=0)
            #    cand2[i,:] = ret[0]

            #cand = np.vstack((cand, cand2))

            # Optimize each point in parallel
            pool = multiprocessing.Pool(self.grid_subset)
            results = [
                pool.apply_async(optimize_pt,
                                 args=(c, b, comp, pend, vals,
                                       copy.copy(self))) for c in cand2
            ]
            for res in results:
                cand = np.vstack((cand, res.get(1e8)))
            pool.close()

            overall_ei = self.ei_over_hypers(comp, pend, cand, vals)
            best_cand = np.argmax(np.mean(overall_ei, axis=1))
            #print "Best EI abhimanu: ", overall_ei[best_cand, :]

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand, :])

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            self.optimize_hypers(comp, vals)

            sys.stderr.write(
                "mean: %.2f  amp: %.2f  noise: %.4f  "
                "min_ls: %.4f  max_ls: %.4f\n" % (self.mean, np.sqrt(
                    self.amp2), self.noise, np.min(self.ls), np.max(self.ls)))

            # Optimize over EI
            b = []  # optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))

            for i in xrange(0, cand2.shape[0]):
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei,
                                        cand2[i, :].flatten(),
                                        args=(comp, vals, True),
                                        bounds=b,
                                        disp=0)
                cand2[i, :] = ret[0]
            cand = np.vstack((cand, cand2))

            ei = self.compute_ei(comp, pend, cand, vals)
            best_cand = np.argmax(ei)

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand, :])

            return int(candidates[best_cand])

    # Compute EI over hyperparameter samples
    def ei_over_hypers(self, comp, pend, cand, vals):
        overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))
        for mcmc_iter in xrange(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]
            overall_ei[:, mcmc_iter] = self.compute_ei(comp, pend, cand, vals)
        return overall_ei

    def check_grad_ei(self, cand, comp, pend, vals):
        (ei, dx1) = self.grad_optimize_ei_over_hypers(cand, comp, pend, vals)
        dx2 = dx1 * 0
        idx = np.zeros(cand.shape[0])
        for i in xrange(0, cand.shape[0]):
            idx[i] = 1e-6
            (ei1,
             tmp) = self.grad_optimize_ei_over_hypers(cand + idx, comp, pend,
                                                      vals)
            (ei2,
             tmp) = self.grad_optimize_ei_over_hypers(cand - idx, comp, pend,
                                                      vals)
            dx2[i] = (ei - ei2) / (2 * 1e-6)
            idx[i] = 0
        print 'computed grads', dx1
        print 'finite diffs', dx2
        print(dx1 / dx2)
        print np.sum((dx1 - dx2)**2)
        time.sleep(2)

    # Adjust points by optimizing EI over a set of hyperparameter samples
    def grad_optimize_ei_over_hypers(self,
                                     cand,
                                     comp,
                                     pend,
                                     vals,
                                     compute_grad=True):
        summed_ei = 0
        summed_grad_ei = np.zeros(cand.shape).flatten()
        ls = self.ls.copy()
        amp2 = self.amp2
        mean = self.mean
        noise = self.noise

        for hyper in self.hyper_samples:
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]
            if compute_grad:
                (ei, g_ei) = self.grad_optimize_ei(cand, comp, pend, vals,
                                                   compute_grad)
                summed_grad_ei = summed_grad_ei + g_ei
            else:
                ei = self.grad_optimize_ei(cand, comp, pend, vals,
                                           compute_grad)
            summed_ei += ei

        self.mean = mean
        self.amp2 = amp2
        self.noise = noise
        self.ls = ls.copy()

        if compute_grad:
            return (summed_ei, summed_grad_ei)
        else:
            return summed_ei

    # Adjust points based on optimizing their ei
    def grad_optimize_ei(self, cand, comp, pend, vals, compute_grad=True):
        if pend.shape[0] == 0:
            best = np.min(vals)
            cand = np.reshape(cand, (-1, comp.shape[1]))

            # The primary covariances for prediction.
            comp_cov = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.ls, comp, cand)

            # Predictive things.
            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            if not compute_grad:
                return ei

            # Gradients of ei w.r.t. mean and variance
            g_ei_m = -ncdf
            g_ei_s2 = 0.5 * npdf / func_s

            # Apply covariance function
            grad_cross = np.squeeze(cand_cross_grad)

            grad_xp_m = np.dot(alpha.transpose(), grad_cross)
            grad_xp_v = np.dot(
                -2 * spla.cho_solve((obsv_chol, True), cand_cross).transpose(),
                grad_cross)

            grad_xp = 0.5 * self.amp2 * (grad_xp_m * g_ei_m +
                                         grad_xp_v * g_ei_s2)
            ei = -np.sum(ei)

            return ei, grad_xp.flatten()

        else:
            # If there are pending experiments, fantasize their outcomes.
            cand = np.reshape(cand, (-1, comp.shape[1]))

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = (self.cov(comp_pend) +
                             self.noise * np.eye(comp_pend.shape[0]))
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            npr.set_state(self.randomstate)
            pend_fant = np.dot(pend_chol,
                               npr.randn(pend.shape[0],
                                         self.pending_samples)) + self.mean

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)
            cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            # Gradients of ei w.r.t. mean and variance
            g_ei_m = -ncdf
            g_ei_s2 = 0.5 * npdf / func_s

            # Apply covariance function
            grad_cross = np.squeeze(cand_cross_grad)

            grad_xp_m = np.dot(alpha.transpose(), grad_cross)
            grad_xp_v = np.dot(
                -2 * spla.cho_solve(
                    (comp_pend_chol, True), cand_cross).transpose(),
                grad_cross)

            grad_xp = 0.5 * self.amp2 * (
                grad_xp_m * np.tile(g_ei_m, (comp.shape[1], 1)).T +
                (grad_xp_v.T * g_ei_s2).T)
            ei = -np.mean(ei, axis=1)
            grad_xp = np.mean(grad_xp, axis=0)

            return ei, grad_xp.flatten()

    def compute_ei(self, comp, pend, cand, vals):
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            if self.prior:
                print "Abhi Getting self.prior, cand.shape ", cand.shape
                cand_orig = np.zeros((cand.shape[0], cand.shape[1]))
                for i in xrange(0, cand.shape[0]):
                    cand_orig[
                        i, :] = self.expt_grid.vmap.get_datapoint_original(
                            cand[i, :])

                p = mvn.pdf(cand_orig, self.prior_mean, self.prior_cov)
                ei = p * ei

            return ei
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = (self.cov(comp_pend) +
                             self.noise * np.eye(comp_pend.shape[0]))
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            npr.set_state(self.randomstate)
            pend_fant = np.dot(pend_chol,
                               npr.randn(pend.shape[0],
                                         self.pending_samples)) + self.mean

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return np.mean(ei, axis=1)

    def sample_hypers(self, comp, vals):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov = (
                self.amp2 *
                (self.cov_func(ls, comp, None) + 1e-6 * np.eye(comp.shape[0]))
                + self.noise * np.eye(comp.shape[0]))
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp = (-np.sum(np.log(np.diag(chol))) -
                  0.5 * np.dot(vals - self.mean, solve))
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov = (amp2 * (self.cov_func(self.ls, comp, None) +
                           1e-6 * np.eye(comp.shape[0])) +
                   noise * np.eye(comp.shape[0]))
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale / noise)**2))

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2,
                                             self.noise]),
                                   logprob,
                                   compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = hypers[2]

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = 1e-3

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0:
                return -np.inf

            cov = (amp2 * (self.cov_func(self.ls, comp, None) +
                           1e-6 * np.eye(comp.shape[0])) +
                   noise * np.eye(comp.shape[0]))
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2,
                                             self.noise]),
                                   logprob,
                                   compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = 1e-3

    def optimize_hypers(self, comp, vals):
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp, vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise

        # Save hyperparameter samples
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.dump_hypers()

        return
Esempio n. 17
0
class GPEIperSecChooser:

    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=10,
                 pending_samples=100, noiseless=False, burnin=100,
                 grid_subset=20):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.stats_file      = os.path.join(expt_dir,
                                   self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters      = int(mcmc_iters)
        self.burnin          = int(burnin)
        self.needs_burnin    = True
        self.pending_samples = pending_samples
        self.D               = -1
        self.hyper_iters     = 1
        # Number of points to optimize EI over
        self.grid_subset     = int(grid_subset)
        self.noiseless       = bool(int(noiseless))
        self.hyper_samples = []
        self.time_hyper_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 10    # top-hat prior on length scales

        self.time_noise_scale = 0.1  # horseshoe prior
        self.time_amp2_scale  = 1    # zero-mean log normal prior
        self.time_max_ls      = 10   # top-hat prior on length scales

    # A simple function to dump out hyperparameters to allow for a hot start
    # if the optimization is restarted.
    def dump_hypers(self):
        self.locker.lock_wait(self.state_pkl)

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump({ 'dims'        : self.D,
                       'ls'          : self.ls,
                       'amp2'        : self.amp2,
                       'noise'       : self.noise,
                       'mean'        : self.mean,
                       'time_ls'     : self.time_ls,
                       'time_amp2'   : self.time_amp2,
                       'time_noise'  : self.time_noise,
                       'time_mean'   : self.time_mean },
                     fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd) # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

    def _real_init(self, dims, values, durations):
        self.locker.lock_wait(self.state_pkl)

        if os.path.exists(self.state_pkl):
            fh    = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D          = state['dims']
            self.ls         = state['ls']
            self.amp2       = state['amp2']
            self.noise      = state['noise']
            self.mean       = state['mean']
            self.time_ls    = state['time_ls']
            self.time_amp2  = state['time_amp2']
            self.time_noise = state['time_noise']
            self.time_mean  = state['time_mean']
        else:

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)
            self.time_ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values)+1e-4
            self.time_amp2 = np.std(durations)+1e-4

            # Initial observation noise.
            self.noise = 1e-3
            self.time_noise = 1e-3

            # Initial mean.
            self.mean = np.mean(values)
            self.time_mean = np.mean(np.log(durations))

        self.locker.unlock(self.state_pkl)

    def cov(self, amp2, ls, x1, x2=None):
        if x2 is None:
            return amp2 * (self.cov_func(ls, x1, None)
                           + 1e-6*np.eye(x1.shape[0]))
        else:
            return amp2 * self.cov_func(ls, x1, x2)

    # Given a set of completed 'experiments' in the unit hypercube with
    # corresponding objective 'values', pick from the next experiment to
    # run according to the acquisition function.
    def next(self, grid, values, durations,
             candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete],
                            durations[complete])

        # Grab out the relevant sets.
        comp = grid[complete,:]
        cand = grid[candidates,:]
        pend = grid[pending,:]
        vals = values[complete]
        durs = durations[complete]

        # Bring time into the log domain before we do anything
        # to maintain strict positivity
        durs = np.log(durs)

        # Spray a set of candidates around the min so far
        numcand = cand.shape[0]
        best_comp = np.argmin(vals)
        cand2 = np.vstack((np.random.randn(10,comp.shape[1])*0.001 +
                           comp[best_comp,:], cand))

        if self.mcmc_iters > 0:

            # Possibly burn in.
            if self.needs_burnin:
                for mcmc_iter in xrange(self.burnin):
                    self.sample_hypers(comp, vals, durs)
                    log("BURN %d/%d] mean: %.2f  amp: %.2f "
                                     "noise: %.4f  min_ls: %.4f  max_ls: %.4f"
                                     % (mcmc_iter+1, self.burnin, self.mean,
                                        np.sqrt(self.amp2), self.noise,
                                        np.min(self.ls), np.max(self.ls)))
                self.needs_burnin = False

            # Sample from hyperparameters.
            # Adjust the candidates to hit ei/sec peaks
            self.hyper_samples = []
            for mcmc_iter in xrange(self.mcmc_iters):
                self.sample_hypers(comp, vals, durs)
                log("%d/%d] mean: %.2f  amp: %.2f  noise: %.4f "
                                 "min_ls: %.4f  max_ls: %.4f"
                                 % (mcmc_iter+1, self.mcmc_iters, self.mean,
                                    np.sqrt(self.amp2), self.noise,
                                    np.min(self.ls), np.max(self.ls)))

                log("%d/%d] time_mean: %.2fs time_amp: %.2f  time_noise: %.4f "
                                 "time_min_ls: %.4f  time_max_ls: %.4f"
                                 % (mcmc_iter+1, self.mcmc_iters, np.exp(self.time_mean),
                                    np.sqrt(self.time_amp2), np.exp(self.time_noise),
                                    np.min(self.time_ls), np.max(self.time_ls)))
            self.dump_hypers()

            # Pick the top candidates to optimize over
            overall_ei = self.ei_over_hypers(comp,pend,cand2,vals,durs)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds,:]

            # Adjust the candidates to hit ei peaks
            b = []# optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))

            for i in xrange(0, cand2.shape[0]):
                log("Optimizing candidate %d/%d" %
                                 (i+1, cand2.shape[0]))
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers,
                                        cand2[i,:].flatten(),
                                        args=(comp,vals,durs,True),
                                        bounds=b, disp=0)
                cand2[i,:] = ret[0]

            cand = np.vstack((cand, cand2))

            overall_ei = self.ei_over_hypers(comp,pend,cand,vals,durs)
            best_cand = np.argmax(np.mean(overall_ei, axis=1))
            self.dump_hypers()
            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand,:])

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            self.optimize_hypers(comp, vals, durs)

            log("mean: %f  amp: %f  noise: %f "
                             "min_ls: %f  max_ls: %f"
                             % (self.mean, np.sqrt(self.amp2),
                                self.noise, np.min(self.ls), np.max(self.ls)))

            # Pick the top candidates to optimize over
            ei = self.compute_ei_per_s(comp, pend, cand2, vals, durs)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds,:]

            # Adjust the candidates to hit ei peaks
            b = []# optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))

            for i in xrange(0, cand2.shape[0]):
                log("Optimizing candidate %d/%d" %
                                 (i+1, cand2.shape[0]))
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei,
                                        cand2[i,:].flatten(),
                                        args=(comp,vals,durs,True),
                                        bounds=b, disp=0)
                cand2[i,:] = ret[0]

            cand = np.vstack((cand, cand2))
            ei = self.compute_ei_per_s(comp, pend, cand, vals, durs)

            best_cand = np.argmax(ei)
            self.dump_hypers()

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand,:])

            return int(candidates[best_cand])

    # Compute EI over hyperparameter samples
    def ei_over_hypers(self,comp,pend,cand,vals,durs):
        overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))
        for mcmc_iter in xrange(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            time_hyper = self.time_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.time_mean = time_hyper[0]
            self.time_noise = time_hyper[1]
            self.time_amp2 = time_hyper[2]
            self.time_ls = time_hyper[3]

            overall_ei[:,mcmc_iter] = self.compute_ei_per_s(comp, pend, cand,
                                                            vals, durs.squeeze())

            return overall_ei

    def check_grad_ei_per(self, cand, comp, vals, durs):
        (ei,dx1) = self.grad_optimize_ei_over_hypers(cand, comp, vals, durs)
        dx2 = dx1*0
        idx = np.zeros(cand.shape[0])
        for i in xrange(0, cand.shape[0]):
            idx[i] = 1e-6
            (ei1,tmp) = self.grad_optimize_ei_over_hypers(cand + idx, comp, vals, durs)
            (ei2,tmp) = self.grad_optimize_ei_over_hypers(cand - idx, comp, vals, durs)
            dx2[i] = (ei - ei2)/(2*1e-6)
            idx[i] = 0
        print 'computed grads', dx1
        print 'finite diffs', dx2
        print (dx1/dx2)
        print np.sum((dx1 - dx2)**2)
        time.sleep(2)

    # Adjust points by optimizing EI over a set of hyperparameter samples
    def grad_optimize_ei_over_hypers(self, cand, comp, vals, durs, compute_grad=True):
        summed_ei = 0
        summed_grad_ei = np.zeros(cand.shape).flatten()

        for mcmc_iter in xrange(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            time_hyper = self.time_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.time_mean = time_hyper[0]
            self.time_noise = time_hyper[1]
            self.time_amp2 = time_hyper[2]
            self.time_ls = time_hyper[3]

            if compute_grad:
                (ei,g_ei) = self.grad_optimize_ei(cand,comp,vals,durs,compute_grad)
                summed_grad_ei = summed_grad_ei + g_ei
            else:
                ei = self.grad_optimize_ei(cand,comp,vals,durs,compute_grad)

            summed_ei += ei

        if compute_grad:
            return (summed_ei, summed_grad_ei)
        else:
            return summed_ei

    def grad_optimize_ei(self, cand, comp, vals, durs, compute_grad=True):
        # Here we have to compute the gradients for ei per second
        # This means deriving through the two kernels, the one for predicting
        # time and the one predicting ei
        best = np.min(vals)
        cand = np.reshape(cand, (-1, comp.shape[1]))

        # First we make predictions for the durations
        # Compute covariances
        comp_time_cov   = self.cov(self.time_amp2, self.time_ls, comp)
        cand_time_cross = self.cov(self.time_amp2, self.time_ls,comp,cand)

        # Cholesky decompositions
        obsv_time_cov  = comp_time_cov + self.time_noise*np.eye(comp.shape[0])
        obsv_time_chol = spla.cholesky( obsv_time_cov, lower=True )

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_time_chol, True), durs - self.time_mean)

        # Predict marginal mean times and (possibly) variances
        func_time_m = np.dot(cand_time_cross.T, t_alpha) + self.time_mean

        # We don't really need the time variances now
        #func_time_v = self.time_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Bring time out of the log domain
        func_time_m = np.exp(func_time_m)

        # Compute derivative of cross-distances.
        grad_cross_r = gp.grad_dist2(self.time_ls, comp, cand)

        # Apply covariance function
        cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.time_ls, comp, cand)
        grad_cross_t = np.squeeze(cand_cross_grad)

        # Now compute the gradients w.r.t. ei
        # The primary covariances for prediction.
        comp_cov   = self.cov(self.amp2, self.ls, comp)
        cand_cross = self.cov(self.amp2, self.ls, comp, cand)

        # Compute the required Cholesky.
        obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
        obsv_chol = spla.cholesky( obsv_cov, lower=True )

        cand_cross_grad = cov_grad_func(self.ls, comp, cand)

        # Predictive things.
        # Solve the linear systems.
        alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
        beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

        # Expected improvement
        func_s = np.sqrt(func_v)
        u      = (best - func_m) / func_s
        ncdf   = sps.norm.cdf(u)
        npdf   = sps.norm.pdf(u)
        ei     = func_s*(u*ncdf + npdf)

        ei_per_s = -np.sum(ei/func_time_m)
        if not compute_grad:
            return ei

        grad_time_xp_m = np.dot(t_alpha.transpose(),grad_cross_t)

        # Gradients of ei w.r.t. mean and variance
        g_ei_m = -ncdf
        g_ei_s2 = 0.5*npdf / func_s

        # Apply covariance function
        grad_cross = np.squeeze(cand_cross_grad)

        grad_xp_m = np.dot(alpha.transpose(),grad_cross)
        grad_xp_v = np.dot(-2*spla.cho_solve((obsv_chol, True),
                                             cand_cross).transpose(),grad_cross)

        grad_xp = 0.5*self.amp2*(grad_xp_m*g_ei_m + grad_xp_v*g_ei_s2)
        grad_time_xp_m = 0.5*self.time_amp2*grad_time_xp_m*func_time_m
        grad_xp = (func_time_m*grad_xp - ei*grad_time_xp_m)/(func_time_m**2)

        return ei_per_s, grad_xp.flatten()

    def compute_ei_per_s(self, comp, pend, cand, vals, durs):
        # First we make predictions for the durations as that
        # doesn't depend on pending experiments

        # Compute covariances
        comp_time_cov   = self.cov(self.time_amp2, self.time_ls, comp)
        cand_time_cross = self.cov(self.time_amp2, self.time_ls,comp,cand)

        # Cholesky decompositions
        obsv_time_cov  = comp_time_cov + self.time_noise*np.eye(comp.shape[0])
        obsv_time_chol = spla.cholesky( obsv_time_cov, lower=True )

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_time_chol, True), durs - self.time_mean)
        #t_beta   = spla.solve_triangular(obsv_time_chol, cand_time_cross, lower=True)

        # Predict marginal mean times and (possibly) variances
        func_time_m = np.dot(cand_time_cross.T, t_alpha) + self.time_mean
        # We don't really need the time variances now
        #func_time_v = self.time_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Bring time out of the log domain
        func_time_m = np.exp(func_time_m)

        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov   = self.cov(self.amp2, self.ls, comp)
            cand_cross = self.cov(self.amp2, self.ls, comp, cand)

            # Compute the required Cholesky.
            obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
            obsv_chol = spla.cholesky( obsv_cov, lower=True )

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u      = (best - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            ei_per_s = ei/func_time_m
            return ei_per_s
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov  = self.cov(self.amp2, self.ls, comp_pend) + self.noise*np.eye(comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(self.amp2, self.ls, comp, pend)
            pend_kappa = self.cov(self.amp2, self.ls, pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0],:comp.shape[0]]

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = np.dot(pend_chol, npr.randn(pend.shape[0],self.pending_samples)) + pend_m[:,None]

            # Include the fantasies.
            fant_vals = np.concatenate((np.tile(vals[:,np.newaxis],
                                                (1,self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(self.amp2, self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha  = spla.cho_solve((comp_pend_chol, True), fant_vals - self.mean)
            beta   = spla.solve_triangular(comp_pend_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:,np.newaxis])
            u      = (bests[np.newaxis,:] - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            return np.divide(np.mean(ei, axis=1), func_time_m)

    def sample_hypers(self, comp, vals, durs):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)

        self._sample_time_noisy(comp, durs.squeeze())
        self._sample_time_ls(comp, durs.squeeze())

        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.time_hyper_samples.append((self.time_mean, self.time_noise, self.time_amp2,
                                        self.time_ls))

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov   = self.amp2 * (self.cov_func(ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-self.mean, solve)
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_time_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.time_max_ls):
                return -np.inf

            cov   = self.time_amp2 * (self.cov_func(ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.time_noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.time_mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-self.time_mean, solve)
            return lp

        self.time_ls = util.slice_sample(self.time_ls, logprob, compwise=True)

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov   = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale/noise)**2))
            #lp -= 0.5*(np.log(noise)/self.noise_scale)**2

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = hypers[2]

    def _sample_time_noisy(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov   = amp2 * (self.cov_func(self.time_ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.time_noise_scale/noise)**2))
            #lp -= 0.5*(np.log(noise)/self.time_noise_scale)**2

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(np.sqrt(amp2))/self.time_amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.time_mean, self.time_amp2, self.time_noise]), logprob, compwise=False)
        self.time_mean  = hypers[0]
        self.time_amp2  = hypers[1]
        self.time_noise = hypers[2]

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = 1e-3

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0:
                return -np.inf

            cov   = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = 1e-3

    def optimize_hypers(self, comp, vals, durs):
        # First the GP to observations
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp,vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise

        # Now the GP to times
        timegp = gp.GP(self.cov_func.__name__)
        timegp.real_init(comp.shape[1], durs)
        timegp.optimize_hypers(comp, durs)
        self.time_mean  = timegp.mean
        self.time_amp2  = timegp.amp2
        self.time_noise = timegp.noise
        self.time_ls    = timegp.ls

        # Save hyperparameter samples
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.time_hyper_samples.append((self.time_mean, self.time_noise, self.time_amp2,
                                        self.time_ls))
        self.dump_hypers()
Esempio n. 18
0
    def __init__(self,
                 expt_dir,
                 expt_name,
                 max_wallclock_time=sys.float_info.max,
                 title=None,
                 folds=1):
        self.expt_dir = expt_dir

        if folds < 1:
            folds = 1

        self.jobs_pkl = os.path.join(expt_dir, expt_name + ".pkl")
        self.locker = Locker.Locker()

        # Only one process at a time is allowed to have access to this.
        sys.stderr.write("Waiting to lock experiments file " + self.jobs_pkl +
                         "...")
        self.locker.lock_wait(self.jobs_pkl)
        sys.stderr.write("...acquired\n")

        # Does this exist already?
        if not os.path.exists(self.jobs_pkl):

            # Set up the experiments file for the first time
            # General information
            # TODO: Unfortunately, this is also the optimizer name
            self.experiment_name = expt_name
            self.title = title
            self.optimizer = None
            self.folds = folds
            self.instance_order = []

            self.trials = []

            # Time information
            # Wallclock_time used for the functions (should be the sum of all
            # instance_durations)
            self.total_wallclock_time = 0
            # The maximal allowed wallclock time
            self.max_wallclock_time = max_wallclock_time
            # Time when wrapping.py kicks of the optimizer
            self.starttime = []
            # Time when the focus is passed back to the optimizer
            self.endtime = []
            # Is triggered everytime cv.py is called, is used to calculate the
            # optimizer time
            self.cv_starttime = []
            # Is triggered when cv.py leaves, used to calculate the optimizer
            # time They are alternatively called when runsolver_wrapper is
            # called by SMAC
            self.cv_endtime = []
            # Dummy field, this will be calculated by wrapping.py after
            # everything is finished
            self.optimizer_time = []

            # Save this out.
            self._save_jobs()

        else:
            # Load in from the pickle.
            self._load_jobs()
Esempio n. 19
0
class GPEIConstrainedChooser:
    def __init__(self,
                 expt_dir,
                 covar="Matern52",
                 mcmc_iters=10,
                 pending_samples=100,
                 noiseless=False,
                 burnin=100,
                 grid_subset=20,
                 constraint_violating_value=-1):
        self.cov_func = getattr(gp, covar)
        self.locker = Locker()
        self.state_pkl = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.stats_file = os.path.join(
            expt_dir, self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters = int(mcmc_iters)
        self.burnin = int(burnin)
        self.needs_burnin = True
        self.pending_samples = pending_samples
        self.D = -1
        self.hyper_iters = 1
        # Number of points to optimize EI over
        self.grid_subset = int(grid_subset)
        self.noiseless = bool(int(noiseless))
        self.hyper_samples = []
        self.constraint_hyper_samples = []
        self.ff = None
        self.ff_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale = 1  # zero-mean log normal prior
        self.max_ls = 2  # top-hat prior on length scales

        self.constraint_noise_scale = 0.1  # horseshoe prior
        self.constraint_amp2_scale = 1  # zero-mean log normal prio
        self.constraint_gain = 1  # top-hat prior on length scales
        self.constraint_max_ls = 2  # top-hat prior on length scales
        self.bad_value = float(constraint_violating_value)

    # A simple function to dump out hyperparameters to allow for a hot start
    # if the optimization is restarted.
    def dump_hypers(self):
        sys.stderr.write("Waiting to lock hyperparameter pickle...")
        self.locker.lock_wait(self.state_pkl)
        sys.stderr.write("...acquired\n")

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump(
            {
                'dims': self.D,
                'ls': self.ls,
                'amp2': self.amp2,
                'noise': self.noise,
                'mean': self.mean,
                'constraint_ls': self.constraint_ls,
                'constraint_amp2': self.constraint_amp2,
                'constraint_noise': self.constraint_noise,
                'constraint_mean': self.constraint_mean
            }, fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd)  # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

    def _real_init(self, dims, values, durations):

        sys.stderr.write("Waiting to lock hyperparameter pickle...")
        self.locker.lock_wait(self.state_pkl)
        sys.stderr.write("...acquired\n")

        if os.path.exists(self.state_pkl):
            fh = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D = state['dims']
            self.ls = state['ls']
            self.amp2 = state['amp2']
            self.noise = state['noise']
            self.mean = state['mean']
            self.constraint_ls = state['constraint_ls']
            self.constraint_amp2 = state['constraint_amp2']
            self.constraint_noise = state['constraint_noise']
            self.constraint_mean = state['constraint_mean']
            self.constraint_gain = state['constraint_mean']
            self.needs_burnin = False
        else:

            # Identify constraint violations
            goodvals = np.nonzero(values != self.bad_value)[0]

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)
            self.constraint_ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values[goodvals])
            self.constraint_amp2 = 1  #np.std(durations)

            # Initial observation noise.
            self.noise = 1e-3
            self.constraint_noise = 1e-3
            self.constraint_gain = 1

            # Initial mean.
            self.mean = np.mean(values[goodvals])
            self.constraint_mean = 0.5

        self.locker.unlock(self.state_pkl)

    def cov(self, amp2, ls, x1, x2=None):
        if x2 is None:
            return amp2 * (self.cov_func(ls, x1, None) +
                           1e-6 * np.eye(x1.shape[0]))
        else:
            return amp2 * self.cov_func(ls, x1, x2)

    # Given a set of completed 'experiments' in the unit hypercube with
    # corresponding objective 'values', pick from the next experiment to
    # run according to the acquisition function.
    def next(self, grid, values, durations, candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete],
                            durations[complete])

        # Grab out the relevant sets.
        comp = grid[complete, :]
        cand = grid[candidates, :]
        pend = grid[pending, :]
        vals = values[complete]

        # Find which completed jobs violated constraints
        badvals = np.nonzero(vals == self.bad_value)[0]
        goodvals = np.nonzero(vals != self.bad_value)[0]
        print 'Found %d constraint violating jobs' % (badvals.shape[0])

        labels = np.zeros(vals.shape[0])
        labels[goodvals] = 1

        if comp.shape[0] < 2:
            return int(candidates[0])

        # Spray a set of candidates around the min so far
        numcand = cand.shape[0]
        best_comp = np.argmin(vals)
        cand2 = np.vstack(
            (np.random.randn(10, comp.shape[1]) * 0.001 + comp[best_comp, :],
             cand))

        if self.mcmc_iters > 0:

            # Possibly burn in.
            if self.needs_burnin:
                for mcmc_iter in xrange(self.burnin):
                    self.sample_constraint_hypers(comp, labels)
                    self.sample_hypers(comp[goodvals, :], vals[goodvals])
                    sys.stderr.write(
                        "BURN %d/%d] mean: %.2f  amp: %.2f "
                        "noise: %.4f  min_ls: %.4f  max_ls: %.4f\n" %
                        (mcmc_iter + 1, self.burnin, self.mean,
                         np.sqrt(self.amp2), self.noise, np.min(
                             self.ls), np.max(self.ls)))
                self.needs_burnin = False

            # Sample from hyperparameters.
            # Adjust the candidates to hit ei/sec peaks
            self.hyper_samples = []
            for mcmc_iter in xrange(self.mcmc_iters):
                self.sample_constraint_hypers(comp, labels)
                self.sample_hypers(comp[goodvals, :], vals[goodvals])
                sys.stderr.write("%d/%d] mean: %.2f  amp: %.2f  noise: %.4f "
                                 "min_ls: %.4f  max_ls: %.4f\n" %
                                 (mcmc_iter + 1, self.mcmc_iters, self.mean,
                                  np.sqrt(self.amp2), self.noise,
                                  np.min(self.ls), np.max(self.ls)))

                sys.stderr.write(
                    "%d/%d] constraint_mean: %.2f "
                    "constraint_amp: %.2f  constraint_gain: %.4f "
                    "constraint_min_ls: %.4f  constraint_max_ls: "
                    "%.4f\n" %
                    (mcmc_iter + 1, self.mcmc_iters, self.constraint_mean,
                     np.sqrt(self.constraint_amp2), self.constraint_gain,
                     np.min(self.constraint_ls), np.max(self.constraint_ls)))
            self.dump_hypers()
            comp_preds = np.zeros(labels.shape[0]).flatten()

            preds = self.pred_constraint_voilation(cand, comp,
                                                   labels).flatten()
            for ii in xrange(self.mcmc_iters):
                constraint_hyper = self.constraint_hyper_samples[ii]
                self.ff = self.ff_samples[ii]
                self.constraint_mean = constraint_hyper[0]
                self.constraint_gain = constraint_hyper[1]
                self.constraint_amp2 = constraint_hyper[2]
                self.constraint_ls = constraint_hyper[3]
                comp_preds += self.pred_constraint_voilation(
                    comp, comp, labels).flatten()
            comp_preds = comp_preds / float(self.mcmc_iters)
            print 'Prediction %f percent violations (%d/%d): ' % (np.mean(
                preds < 0.5), np.sum(preds < 0.5), preds.shape[0])
            print 'Prediction %f percent train accuracy (%d/%d): ' % (np.mean(
                (comp_preds > 0.5
                 ) == labels), np.sum(
                     (comp_preds > 0.5) == labels), comp_preds.shape[0])

            if False:
                delta = 0.025
                x = np.arange(0, 1.0, delta)
                y = np.arange(0, 1.0, delta)
                X, Y = np.meshgrid(x, y)

                cpreds = np.zeros((X.shape[0], X.shape[1]))
                predei = np.zeros((X.shape[0], X.shape[1]))
                predei2 = np.zeros((X.shape[0], X.shape[1]))
                for ii in xrange(self.mcmc_iters):
                    constraint_hyper = self.constraint_hyper_samples[ii]
                    self.ff = self.ff_samples[ii]
                    self.constraint_mean = constraint_hyper[0]
                    self.constraint_gain = constraint_hyper[1]
                    self.constraint_amp2 = constraint_hyper[2]
                    self.constraint_ls = constraint_hyper[3]

                    cpred = self.pred_constraint_voilation(
                        np.hstack((X.flatten()[:, np.newaxis],
                                   Y.flatten()[:, np.newaxis])), comp, labels)
                    pei = self.compute_ei_per_s(
                        comp, pend,
                        np.hstack((X.flatten()[:, np.newaxis],
                                   Y.flatten()[:, np.newaxis])), vals, labels)
                    pei2 = self.compute_ei(
                        comp, pend,
                        np.hstack((X.flatten()[:, np.newaxis],
                                   Y.flatten()[:, np.newaxis])), vals, labels)

                    cpreds += np.reshape(cpred, (X.shape[0], X.shape[1]))
                    predei += np.reshape(pei, (X.shape[0], X.shape[1]))
                    predei2 += np.reshape(pei2, (X.shape[0], X.shape[1]))

                plt.figure(1)
                cpreds = cpreds / float(self.mcmc_iters)
                CS = plt.contour(X, Y, cpreds)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0, 0], comp[labels == 0, 1], 'rx')
                plt.plot(comp[labels == 1, 0], comp[labels == 1, 1], 'bx')
                plt.title(
                    'Contours of Classification GP (Prob of not being a constraint violation)'
                )
                plt.legend(('Constraint Violations', 'Good points'),
                           'lower left')
                plt.savefig('constrained_ei_chooser_class_contour.pdf')

                plt.figure(2)
                predei = predei / float(self.mcmc_iters)
                CS = plt.contour(X, Y, predei)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0, 0], comp[labels == 0, 1], 'rx')
                plt.plot(comp[labels == 1, 0], comp[labels == 1, 1], 'bx')
                plt.title('Contours of EI*P(not violating constraint)')
                plt.legend(('Constraint Violations', 'Good points'),
                           'lower left')
                plt.savefig('constrained_ei_chooser_eitimesprob_contour.pdf')

                plt.figure(3)
                predei2 = predei2 / float(self.mcmc_iters)
                CS = plt.contour(X, Y, predei2)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0, 0], comp[labels == 0, 1], 'rx')
                plt.plot(comp[labels == 1, 0], comp[labels == 1, 1], 'bx')
                plt.title('Contours of EI')
                plt.legend(('Constraint Violations', 'Good points'),
                           'lower left')
                plt.savefig('constrained_ei_chooser_ei_contour.pdf')
                plt.show()

            # Pick the top candidates to optimize over
            overall_ei = self.ei_over_hypers(comp, pend, cand2, vals, labels)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds, :]

            # Adjust the candidates to hit ei peaks
            b = []  # optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))

            for i in xrange(0, cand2.shape[0]):
                sys.stderr.write("Optimizing candidate %d/%d\n" %
                                 (i + 1, cand2.shape[0]))
                self.check_grad_ei_per(cand2[i, :], comp, vals, labels)
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers,
                                        cand2[i, :].flatten(),
                                        args=(comp, vals, labels, True),
                                        bounds=b,
                                        disp=0)
                cand2[i, :] = ret[0]

            cand = np.vstack((cand, cand2))

            overall_ei = self.ei_over_hypers(comp, pend, cand, vals, labels)
            best_cand = np.argmax(np.mean(overall_ei, axis=1))

            self.dump_hypers()
            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand, :])

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            self.optimize_hypers(comp, vals, labels)

            sys.stderr.write(
                "mean: %f  amp: %f  noise: %f "
                "min_ls: %f  max_ls: %f\n" % (self.mean, np.sqrt(
                    self.amp2), self.noise, np.min(self.ls), np.max(self.ls)))

            # Pick the top candidates to optimize over
            ei = self.compute_ei_per_s(comp, pend, cand2, vals, labels)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds, :]

            # Adjust the candidates to hit ei peaks
            b = []  # optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))

            for i in xrange(0, cand2.shape[0]):
                sys.stderr.write("Optimizing candidate %d/%d\n" %
                                 (i + 1, cand2.shape[0]))
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei,
                                        cand2[i, :].flatten(),
                                        args=(comp, vals, labels, True),
                                        bounds=b,
                                        disp=0)
                cand2[i, :] = ret[0]

            cand = np.vstack((cand, cand2))
            ei = self.compute_ei_per_s(comp, pend, cand, vals, labels)

            best_cand = np.argmax(ei)
            self.dump_hypers()

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand, :])

            return int(candidates[best_cand])

    # Predict constraint voilating points
    def pred_constraint_voilation(self, cand, comp, vals):
        # The primary covariances for prediction.
        comp_cov = self.cov(self.constraint_amp2, self.constraint_ls, comp)
        cand_cross = self.cov(self.constraint_amp2, self.constraint_ls, comp,
                              cand)

        # Compute the required Cholesky.
        obsv_cov = comp_cov + self.constraint_noise * np.eye(comp.shape[0])
        obsv_chol = spla.cholesky(obsv_cov, lower=True)

        cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.constraint_ls, comp, cand)

        # Predictive things.
        # Solve the linear systems.
        alpha = spla.cho_solve((obsv_chol, True),
                               self.ff)  # - self.constraint_mean)
        beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha)  # + self.constraint_mean
        func_m = 1. / (1 + np.exp(-self.constraint_gain * func_m))

        return func_m

    # Compute EI over hyperparameter samples
    def ei_over_hypers(self, comp, pend, cand, vals, labels):
        overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))
        for mcmc_iter in xrange(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            constraint_hyper = self.constraint_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.constraint_mean = constraint_hyper[0]
            self.constraint_gain = constraint_hyper[1]
            self.constraint_amp2 = constraint_hyper[2]
            self.constraint_ls = constraint_hyper[3]
            overall_ei[:, mcmc_iter] = self.compute_ei_per_s(
                comp, pend, cand, vals, labels)

        return overall_ei

    # Adjust points by optimizing EI over a set of hyperparameter samples
    def grad_optimize_ei_over_hypers(self,
                                     cand,
                                     comp,
                                     vals,
                                     labels,
                                     compute_grad=True):
        summed_ei = 0
        summed_grad_ei = np.zeros(cand.shape).flatten()

        for mcmc_iter in xrange(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            constraint_hyper = self.constraint_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.constraint_mean = constraint_hyper[0]
            self.constraint_gain = constraint_hyper[1]
            self.constraint_amp2 = constraint_hyper[2]
            self.constraint_ls = constraint_hyper[3]

            if compute_grad:
                (ei, g_ei) = self.grad_optimize_ei(cand, comp, vals, labels,
                                                   compute_grad)
                summed_grad_ei = summed_grad_ei + g_ei
            else:
                ei = self.grad_optimize_ei(cand, comp, vals, labels,
                                           compute_grad)

            summed_ei += ei

        if compute_grad:
            return (summed_ei, summed_grad_ei)
        else:
            return summed_ei

    def check_grad_ei_per(self, cand, comp, vals, labels):
        (ei, dx1) = self.grad_optimize_ei_over_hypers(cand, comp, vals, labels)
        dx2 = dx1 * 0
        idx = np.zeros(cand.shape[0])
        for i in xrange(0, cand.shape[0]):
            idx[i] = 1e-6
            (ei1,
             tmp) = self.grad_optimize_ei_over_hypers(cand + idx, comp, vals,
                                                      labels)
            (ei2,
             tmp) = self.grad_optimize_ei_over_hypers(cand - idx, comp, vals,
                                                      labels)
            dx2[i] = (ei - ei2) / (2 * 1e-6)
            idx[i] = 0
        print 'computed grads', dx1
        print 'finite diffs', dx2
        print(dx1 / dx2)
        print np.sum((dx1 - dx2)**2)
        time.sleep(2)

    def grad_optimize_ei(self, cand, comp, vals, labels, compute_grad=True):
        # Here we have to compute the gradients for constrained ei
        # This means deriving through the two kernels, the one for predicting
        # constraint violations and the one predicting ei

        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        best = np.min(vals)
        cand = np.reshape(cand, (-1, comp.shape[1]))

        # First we make predictions for the durations
        # Compute covariances
        comp_constraint_cov = self.cov(self.constraint_amp2,
                                       self.constraint_ls, compfull)
        cand_constraint_cross = self.cov(self.constraint_amp2,
                                         self.constraint_ls, compfull, cand)

        # Cholesky decompositions
        obsv_constraint_cov = comp_constraint_cov + self.constraint_noise * np.eye(
            compfull.shape[0])
        obsv_constraint_chol = spla.cholesky(obsv_constraint_cov, lower=True)

        # Linear systems
        t_alpha = spla.cho_solve((obsv_constraint_chol, True),
                                 self.ff)  # - self.constraint_mean)

        # Predict marginal mean times and (possibly) variances
        func_constraint_m = np.dot(cand_constraint_cross.T, t_alpha)

        # Squash through logistic to get probabilities
        func_constraint_m = 1. / (
            1 + np.exp(-self.constraint_gain * func_constraint_m))

        # Apply covariance function
        cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.constraint_ls, compfull, cand)
        grad_cross_t = np.squeeze(cand_cross_grad)

        # Now compute the gradients w.r.t. ei
        # The primary covariances for prediction.
        comp_cov = self.cov(self.amp2, self.ls, comp)
        cand_cross = self.cov(self.amp2, self.ls, comp, cand)
        comp_cov_full = self.cov(self.amp2, self.ls, compfull)
        cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

        # Compute the required Cholesky.
        obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
        obsv_chol = spla.cholesky(obsv_cov, lower=True)
        obsv_cov_full = comp_cov_full + self.noise * np.eye(compfull.shape[0])
        obsv_chol_full = spla.cholesky(obsv_cov_full, lower=True)

        # Predictive things.
        # Solve the linear systems.
        alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
        #beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)
        beta = spla.solve_triangular(obsv_chol_full,
                                     cand_cross_full,
                                     lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

        # Expected improvement
        func_s = np.sqrt(func_v)
        u = (best - func_m) / func_s
        ncdf = sps.norm.cdf(u)
        npdf = sps.norm.pdf(u)
        ei = func_s * (u * ncdf + npdf)

        ei_per_s = -np.sum(ei * func_constraint_m)
        if not compute_grad:
            return ei_per_s

        grad_constraint_xp_m = np.dot(t_alpha.transpose(), grad_cross_t)

        # Gradients of ei w.r.t. mean and variance
        g_ei_m = -ncdf
        g_ei_s2 = 0.5 * npdf / func_s

        # Apply covariance function
        cand_cross_grad = cov_grad_func(self.ls, comp, cand)
        grad_cross = np.squeeze(cand_cross_grad)

        cand_cross_grad_full = cov_grad_func(self.ls, compfull, cand)
        grad_cross_full = np.squeeze(cand_cross_grad_full)

        grad_xp_m = np.dot(alpha.transpose(), grad_cross)
        #grad_xp_v = np.dot(-2*spla.cho_solve((obsv_chol, True),
        #                                     cand_cross).transpose(),grad_cross)
        grad_xp_v = np.dot(
            -2 * spla.cho_solve(
                (obsv_chol_full, True), cand_cross_full).transpose(),
            grad_cross_full)

        grad_xp = 0.5 * self.amp2 * (grad_xp_m * g_ei_m + grad_xp_v * g_ei_s2)
        grad_constraint_xp_m = 0.5 * self.constraint_amp2 * self.constraint_gain * grad_constraint_xp_m * func_constraint_m * (
            1 - func_constraint_m)

        grad_xp = (func_constraint_m * grad_xp + ei * grad_constraint_xp_m)

        return ei_per_s, grad_xp.flatten()

    def compute_ei_per_s(self, comp, pend, cand, vals, labels):
        # First we make predictions for the durations as that
        # doesn't depend on pending experiments
        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        # Compute covariances
        comp_constraint_cov = self.cov(self.constraint_amp2,
                                       self.constraint_ls, compfull)
        cand_constraint_cross = self.cov(self.constraint_amp2,
                                         self.constraint_ls, compfull, cand)

        # Cholesky decompositions
        obsv_constraint_cov = comp_constraint_cov + self.constraint_noise * np.eye(
            compfull.shape[0])
        obsv_constraint_chol = spla.cholesky(obsv_constraint_cov, lower=True)

        # Linear systems
        t_alpha = spla.cho_solve((obsv_constraint_chol, True),
                                 self.ff)  # - self.constraint_mean)
        #t_beta   = spla.solve_triangular(obsv_constraint_chol, cand_constraint_cross, lower=True)

        # Predict marginal mean times and (possibly) variances
        func_constraint_m = (np.dot(cand_constraint_cross.T,
                                    t_alpha))  # + self.constraint_mean)

        # We don't really need the time variances now
        #func_constraint_v = self.constraint_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Squash through a logistic to get probability of not violating a constraint
        func_constraint_m = 1. / (
            1 + np.exp(-self.constraint_gain * func_constraint_m))

        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(self.amp2, self.ls, comp)
            comp_cov_full = self.cov(self.amp2, self.ls, compfull)
            cand_cross = self.cov(self.amp2, self.ls, comp, cand)
            cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_cov_full = comp_cov_full + self.noise * np.eye(
                compfull.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)
            obsv_chol_full = spla.cholesky(obsv_cov_full, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            #beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)
            beta = spla.solve_triangular(obsv_chol_full,
                                         cand_cross_full,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            ei_per_s = ei * func_constraint_m
            return ei_per_s
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = self.cov(
                self.amp2, self.ls,
                comp_pend) + self.noise * np.eye(comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(self.amp2, self.ls, comp, pend)
            pend_kappa = self.cov(self.amp2, self.ls, pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = np.dot(pend_chol,
                               npr.randn(pend.shape[0],
                                         self.pending_samples)) + self.mean

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(self.amp2, self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return np.mean(ei, axis=1) * func_constraint_m

    def compute_ei(self, comp, pend, cand, vals, labels):
        # First we make predictions for the durations as that
        # doesn't depend on pending experiments
        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        # Compute covariances
        comp_constraint_cov = self.cov(self.constraint_amp2,
                                       self.constraint_ls, compfull)
        cand_constraint_cross = self.cov(self.constraint_amp2,
                                         self.constraint_ls, compfull, cand)

        # Cholesky decompositions
        obsv_constraint_cov = comp_constraint_cov + self.constraint_noise * np.eye(
            compfull.shape[0])
        obsv_constraint_chol = spla.cholesky(obsv_constraint_cov, lower=True)

        # Linear systems
        t_alpha = spla.cho_solve((obsv_constraint_chol, True),
                                 self.ff)  # - self.constraint_mean)
        #t_beta   = spla.solve_triangular(obsv_constraint_chol, cand_constraint_cross, lower=True)

        # Predict marginal mean times and (possibly) variances
        func_constraint_m = (np.dot(cand_constraint_cross.T,
                                    t_alpha))  # + self.constraint_mean)

        # We don't really need the time variances now
        #func_constraint_v = self.constraint_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Squash through a logistic to get probability of not violating a constraint
        func_constraint_m = 1. / (
            1 + np.exp(-self.constraint_gain * func_constraint_m))

        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(self.amp2, self.ls, comp)
            comp_cov_full = self.cov(self.amp2, self.ls, compfull)
            cand_cross = self.cov(self.amp2, self.ls, comp, cand)
            cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_cov_full = comp_cov_full + self.noise * np.eye(
                compfull.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)
            obsv_chol_full = spla.cholesky(obsv_cov_full, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)
            #beta   = spla.solve_triangular(obsv_chol_full, cand_cross_full, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            ei_per_s = ei
            #ei_per_s = ei
            return ei
        else:
            return 0

    def sample_constraint_hypers(self, comp, labels):
        # The latent GP projection
        if self.ff is None:
            comp_cov = self.cov(self.amp2, self.ls, comp)
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)
            self.ff = np.dot(obsv_chol, npr.randn(obsv_chol.shape[0]))

        self._sample_constraint_noisy(comp, labels)
        self._sample_constraint_ls(comp, labels)
        self.constraint_hyper_samples.append(
            (self.constraint_mean, self.constraint_gain, self.constraint_amp2,
             self.constraint_ls))
        self.ff_samples.append(self.ff)

    def sample_hypers(self, comp, vals):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)

        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov = self.amp2 * (self.cov_func(ls, comp, None) + 1e-6 * np.eye(
                comp.shape[0])) + self.noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp = (-np.sum(np.log(np.diag(chol))) -
                  0.5 * np.dot(vals - self.mean, solve))
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_constraint_ls(self, comp, vals):
        def lpSigmoid(ff, gain=self.constraint_gain):
            probs = 1. / (1. + np.exp(-gain * ff))
            probs[probs <= 0] = 1e-12
            probs[probs >= 1] = 1 - 1e-12
            llh = np.sum(vals * np.log(probs) + (1 - vals) * np.log(1 - probs))
            return llh

        def updateGain(gain):
            if gain < 0.01 or gain > 10:
                return -np.inf

            cov = self.constraint_amp2 * (
                self.cov_func(self.constraint_ls, comp, None) +
                1e-6 * np.eye(comp.shape[0])) + self.constraint_noise * np.eye(
                    comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True),
                                   vals)  # - self.constraint_mean)
            #lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(self.ff, solve)
            lp = lpSigmoid(self.ff, gain)

            return lp

        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.constraint_max_ls):
                return -np.inf

            cov = self.constraint_amp2 * (
                self.cov_func(ls, comp, None) + 1e-6 * np.eye(comp.shape[0])
            ) + self.constraint_noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True),
                                   self.ff)  # - self.constraint_mean)
            #lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(self.ff, solve)

            lp = lpSigmoid(self.ff)

            return lp

        #hypers = util.slice_sample(np.hstack((self.constraint_ls, self.ff)), logprob, compwise=True)
        hypers = util.slice_sample(self.constraint_ls, logprob, compwise=True)
        self.constraint_ls = hypers

        cov = self.constraint_amp2 * (
            self.cov_func(self.constraint_ls, comp, None) + 1e-6 * np.eye(
                comp.shape[0])) + self.constraint_noise * np.eye(comp.shape[0])
        chol = spla.cholesky(cov, lower=False)
        ff = self.ff
        for jj in xrange(20):
            (ff, lpell) = self.elliptical_slice(ff, chol, lpSigmoid)

        self.ff = ff

        # Update gain
        hypers = util.slice_sample(np.array([self.constraint_gain]),
                                   updateGain,
                                   compwise=True)
        self.constraint_gain = hypers

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6 * np.eye(
                comp.shape[0])) + noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale / noise)**2))
            #lp -= 0.5*(np.log(noise)/self.noise_scale)**2

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2,
                                             self.noise]),
                                   logprob,
                                   compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = hypers[2]

    def _sample_constraint_noisy(self, comp, vals):
        def lpSigmoid(ff, gain=self.constraint_gain):
            probs = 1. / (1. + np.exp(-gain * ff))
            probs[probs <= 0] = 1e-12
            probs[probs >= 1] = 1 - 1e-12
            llh = np.sum(vals * np.log(probs) + (1 - vals) * np.log(1 - probs))
            return llh

        def logprob(hypers):
            #mean  = hypers[0]
            amp2 = hypers[0]
            #gain = hypers[2]
            ff = hypers[1:]

            # This is pretty hacky, but keeps things sane.
            #if mean > np.max(vals) or mean < np.min(vals):
            #    return -np.inf

            if amp2 < 0:
                return -np.inf

            noise = self.constraint_noise
            cov = amp2 * (self.cov_func(self.constraint_ls, comp, None) +
                          1e-6 * np.eye(comp.shape[0])) + noise * np.eye(
                              comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), ff)  # - mean)
            #lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(ff-mean, solve)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(ff, solve)

            # Roll in noise horseshoe prior.
            #lp += np.log(np.log(1 + (self.constraint_noise_scale/noise)**2))
            #lp -= 0.5*(np.log(noise)/self.constraint_noise_scale)**2

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.constraint_amp2_scale)**2

            #lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(self.ff, solve)
            lp += lpSigmoid(ff, self.constraint_gain)

            return lp

        hypers = util.slice_sample(np.hstack(
            (np.array([self.constraint_amp2]), self.ff)),
                                   logprob,
                                   compwise=False)
        #self.constraint_mean  = hypers[0]
        self.constraint_amp2 = hypers[0]
        #self.constraint_gain = hypers[2]
        self.ff = hypers[1:]
        cov = self.constraint_amp2 * (
            self.cov_func(self.constraint_ls, comp, None) + 1e-6 * np.eye(
                comp.shape[0])) + self.constraint_noise * np.eye(comp.shape[0])
        chol = spla.cholesky(cov, lower=False)
        ff = self.ff
        for jj in xrange(50):
            (ff, lpell) = self.elliptical_slice(ff, chol, lpSigmoid)
        self.ff = ff

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = 1e-3

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0:
                return -np.inf

            cov = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6 * np.eye(
                comp.shape[0])) + noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2,
                                             self.noise]),
                                   logprob,
                                   compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = 1e-3

    def elliptical_slice(self,
                         xx,
                         chol_Sigma,
                         log_like_fn,
                         cur_log_like=None,
                         angle_range=0):
        D = xx.shape[0]

        if cur_log_like is None:
            cur_log_like = log_like_fn(xx)

        nu = np.dot(chol_Sigma.T, np.random.randn(D, 1)).flatten()
        hh = np.log(np.random.rand()) + cur_log_like

        # Set up a bracket of angles and pick a first proposal.
        # "phi = (theta'-theta)" is a change in angle.
        if angle_range <= 0:
            # Bracket whole ellipse with both edges at first proposed point
            phi = np.random.rand() * 2 * math.pi
            phi_min = phi - 2 * math.pi
            phi_max = phi
        else:
            # Randomly center bracket on current point
            phi_min = -angle_range * np.random.rand()
            phi_max = phi_min + angle_range
            phi = np.random.rand() * (phi_max - phi_min) + phi_min

        # Slice sampling loop
        while True:
            # Compute xx for proposed angle difference and check if it's on the slice
            xx_prop = xx * np.cos(phi) + nu * np.sin(phi)

            cur_log_like = log_like_fn(xx_prop)
            if cur_log_like > hh:
                # New point is on slice, ** EXIT LOOP **
                break

            # Shrink slice to rejected point
            if phi > 0:
                phi_max = phi
            elif phi < 0:
                phi_min = phi
            else:
                raise Exception(
                    'BUG DETECTED: Shrunk to current position and still not acceptable.'
                )

            # Propose new angle difference
            phi = np.random.rand() * (phi_max - phi_min) + phi_min

        xx = xx_prop
        return (xx, cur_log_like)

    def optimize_hypers(self, comp, vals, labels):
        # First the GP to observations
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp, vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise

        # Now the GP to times
        timegp = gp.GP(self.cov_func.__name__)
        timegp.real_init(comp.shape[1], labels)
        timegp.optimize_hypers(comp, labels)
        self.constraint_mean = timegp.mean
        self.constraint_amp2 = timegp.amp2
        self.constraint_noise = timegp.noise
        self.constraint_ls = timegp.ls

        # Save hyperparameter samples
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.constraint_hyper_samples.append(
            (self.constraint_mean, self.constraint_noise, self.constraint_amp2,
             self.constraint_ls))
        self.dump_hypers()
class GPEIOptChooser:

    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=10,
                 pending_samples=100, noiseless=False, burnin=100,
                 grid_subset=20, use_multiprocessing=True):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")
        self.stats_file      = os.path.join(expt_dir,
                                   self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters      = int(mcmc_iters)
        self.burnin          = int(burnin)
        self.needs_burnin    = True
        self.pending_samples = int(pending_samples)
        self.D               = -1
        self.hyper_iters     = 1        
        # Number of points to optimize EI over
        self.grid_subset     = int(grid_subset)
        self.noiseless       = bool(int(noiseless))
        self.hyper_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 2    # top-hat prior on length scales

        # If multiprocessing fails or deadlocks, set this to False
        self.use_multiprocessing = bool(int(use_multiprocessing))


    def dump_hypers(self):
        self.locker.lock_wait(self.state_pkl)

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump({ 'dims'          : self.D,
                       'ls'            : self.ls,
                       'amp2'          : self.amp2,
                       'noise'         : self.noise,
                       'hyper_samples' : self.hyper_samples,
                       'mean'          : self.mean },
                     fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd) # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

        # Write the hyperparameters out to a human readable file as well
        fh    = open(self.stats_file, 'w')
        fh.write('Mean Noise Amplitude <length scales>\n')
        fh.write('-----------ALL SAMPLES-------------\n')
        meanhyps = 0*np.hstack(self.hyper_samples[0])
        for i in self.hyper_samples:
            hyps = np.hstack(i)
            meanhyps += (1/float(len(self.hyper_samples)))*hyps
            for j in hyps:
                fh.write(str(j) + ' ')
            fh.write('\n')

        fh.write('-----------MEAN OF SAMPLES-------------\n')
        for j in meanhyps:
            fh.write(str(j) + ' ')
        fh.write('\n')
        fh.close()

    # This passes out html or javascript to display interesting
    # stats - such as the length scales (sensitivity to various
    # dimensions).
    def generate_stats_html(self):
        # Need this because the model may not necessarily be
        # initialized when this code is called.
        if not self._read_only():
            return 'Chooser not yet ready to display output'

        mean_mean  = np.mean(np.vstack([h[0] for h in self.hyper_samples]))
        mean_noise = np.mean(np.vstack([h[1] for h in self.hyper_samples]))
        mean_ls    = np.mean(np.vstack([h[3][np.newaxis,:] for h in self.hyper_samples]),0)

        try:
            output = (
                '<br /><span class=\"label label-info\">Estimated mean:</span> ' + str(mean_mean) + 
                '<br /><span class=\"label label-info\">Estimated noise:</span> ' + str(mean_noise) + 
                '<br /><br /><span class=\"label label-info\">Inverse parameter sensitivity' +
                ' - Gaussian Process length scales</span><br /><br />' +
                '<div id=\"lschart\"></div><script type=\"text/javascript\">' +
                'var lsdata = [' + ','.join(['%.2f' % i for i in mean_ls]) + '];')
        except:
            return 'Chooser not yet ready to display output.'

        output += ('bar_chart("#lschart", lsdata, ' + str(self.max_ls) + ');' +
                   '</script>')
        return output

    # Read in the chooser from file. Returns True only on success
    def _read_only(self):
        if os.path.exists(self.state_pkl):
            fh    = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D             = state['dims']
            self.ls            = state['ls']
            self.amp2          = state['amp2']
            self.noise         = state['noise']
            self.mean          = state['mean']
            self.hyper_samples = state['hyper_samples']
            self.needs_burnin  = False
            return True

        return False

    def _real_init(self, dims, values):
        self.locker.lock_wait(self.state_pkl)

        self.randomstate = npr.get_state()
        if os.path.exists(self.state_pkl):
            fh    = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D             = state['dims']
            self.ls            = state['ls']
            self.amp2          = state['amp2']
            self.noise         = state['noise']
            self.mean          = state['mean']
            self.hyper_samples = state['hyper_samples']
            self.needs_burnin  = False
        else:

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values)+1e-4

            # Initial observation noise.
            self.noise = 1e-3

            # Initial mean.
            self.mean = np.mean(values)

            # Save hyperparameter samples
            self.hyper_samples.append((self.mean, self.noise, self.amp2,
                                       self.ls))

        self.locker.unlock(self.state_pkl)

    def cov(self, x1, x2=None):
        if x2 is None:
            return self.amp2 * (self.cov_func(self.ls, x1, None)
                               + 1e-6*np.eye(x1.shape[0]))
        else:
            return self.amp2 * self.cov_func(self.ls, x1, x2)

    # Given a set of completed 'experiments' in the unit hypercube with
    # corresponding objective 'values', pick from the next experiment to
    # run according to the acquisition function.
    def next(self, grid, values, durations,
             candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete])

        # Grab out the relevant sets.
        comp = grid[complete,:]
        cand = grid[candidates,:]
        pend = grid[pending,:]
        vals = values[complete]
        numcand = cand.shape[0]

        # Spray a set of candidates around the min so far
        best_comp = np.argmin(vals)
        cand2 = np.vstack((np.random.randn(10,comp.shape[1])*0.001 +
                           comp[best_comp,:], cand))

        if self.mcmc_iters > 0:

            # Possibly burn in.
            if self.needs_burnin:
                for mcmc_iter in xrange(self.burnin):
                    self.sample_hypers(comp, vals)
                    log("BURN %d/%d] mean: %.2f  amp: %.2f "
                                     "noise: %.4f  min_ls: %.4f  max_ls: %.4f"
                                     % (mcmc_iter+1, self.burnin, self.mean,
                                        np.sqrt(self.amp2), self.noise,
                                        np.min(self.ls), np.max(self.ls)))
                self.needs_burnin = False

            # Sample from hyperparameters.
            # Adjust the candidates to hit ei peaks
            self.hyper_samples = []
            for mcmc_iter in xrange(self.mcmc_iters):
                self.sample_hypers(comp, vals)
                log("%d/%d] mean: %.2f  amp: %.2f  noise: %.4f "
                                 "min_ls: %.4f  max_ls: %.4f"
                                 % (mcmc_iter+1, self.mcmc_iters, self.mean,
                                    np.sqrt(self.amp2), self.noise,
                                    np.min(self.ls), np.max(self.ls)))
            self.dump_hypers()

            b = []# optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))

            overall_ei = self.ei_over_hypers(comp,pend,cand2,vals)
            inds = np.argsort(np.mean(overall_ei,axis=1))[-self.grid_subset:]
            cand2 = cand2[inds,:]

            # Optimize each point in parallel
            if self.use_multiprocessing:
                pool = multiprocessing.Pool(self.grid_subset)
                results = [pool.apply_async(optimize_pt,args=(
                            c,b,comp,pend,vals,copy.copy(self))) for c in cand2]
                for res in results:
                    cand = np.vstack((cand, res.get(1e8)))
                pool.close()
            else:
                # This is old code to optimize each point in parallel.
                for i in xrange(0, cand2.shape[0]):
                    log("Optimizing candidate %d/%d" %
                        (i+1, cand2.shape[0]))
                    #self.check_grad_ei(cand2[i,:].flatten(), comp, pend, vals)
                    ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers,
                                            cand2[i,:].flatten(), args=(comp,pend,vals),
                                            bounds=b, disp=0)
                    cand2[i,:] = ret[0]
                cand = np.vstack((cand, cand2))

            overall_ei = self.ei_over_hypers(comp,pend,cand,vals)
            best_cand = np.argmax(np.mean(overall_ei, axis=1))

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand,:])

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            self.optimize_hypers(comp, vals)

            log("mean: %.2f  amp: %.2f  noise: %.4f  "
                             "min_ls: %.4f  max_ls: %.4f"
                             % (self.mean, np.sqrt(self.amp2), self.noise,
                                np.min(self.ls), np.max(self.ls)))

            # Optimize over EI
            b = []# optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))

            for i in xrange(0, cand2.shape[0]):
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei,
                                        cand2[i,:].flatten(), args=(comp,vals,True),
                                        bounds=b, disp=0)
                cand2[i,:] = ret[0]
            cand = np.vstack((cand, cand2))

            ei = self.compute_ei(comp, pend, cand, vals)
            best_cand = np.argmax(ei)

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand,:])

            return int(candidates[best_cand])

    # Compute EI over hyperparameter samples
    def ei_over_hypers(self,comp,pend,cand,vals):
        overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))
        for mcmc_iter in xrange(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]
            overall_ei[:,mcmc_iter] = self.compute_ei(comp, pend, cand,
                                                      vals)
        return overall_ei

    def check_grad_ei(self, cand, comp, pend, vals):
        (ei,dx1) = self.grad_optimize_ei_over_hypers(cand, comp, pend, vals)
        dx2 = dx1*0
        idx = np.zeros(cand.shape[0])
        for i in xrange(0, cand.shape[0]):
            idx[i] = 1e-6
            (ei1,tmp) = self.grad_optimize_ei_over_hypers(cand + idx, comp, pend, vals)
            (ei2,tmp) = self.grad_optimize_ei_over_hypers(cand - idx, comp, pend, vals)
            dx2[i] = (ei - ei2)/(2*1e-6)
            idx[i] = 0
        print 'computed grads', dx1
        print 'finite diffs', dx2
        print (dx1/dx2)
        print np.sum((dx1 - dx2)**2)
        time.sleep(2)

    # Adjust points by optimizing EI over a set of hyperparameter samples
    def grad_optimize_ei_over_hypers(self, cand, comp, pend, vals, compute_grad=True):
        summed_ei = 0
        summed_grad_ei = np.zeros(cand.shape).flatten()
        ls = self.ls.copy()
        amp2 = self.amp2
        mean = self.mean
        noise = self.noise

        for hyper in self.hyper_samples:
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]
            if compute_grad:
                (ei,g_ei) = self.grad_optimize_ei(cand,comp,pend,vals,compute_grad)
                summed_grad_ei = summed_grad_ei + g_ei
            else:
                ei = self.grad_optimize_ei(cand,comp,pend,vals,compute_grad)
            summed_ei += ei

        self.mean = mean
        self.amp2 = amp2
        self.noise = noise
        self.ls = ls.copy()

        if compute_grad:
            return (summed_ei, summed_grad_ei)
        else:
            return summed_ei

    # Adjust points based on optimizing their ei
    def grad_optimize_ei(self, cand, comp, pend, vals, compute_grad=True):
        if pend.shape[0] == 0:
            best = np.min(vals)
            cand = np.reshape(cand, (-1, comp.shape[1]))

            # The primary covariances for prediction.
            comp_cov   = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.ls, comp, cand)

            # Predictive things.
            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u      = (best - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            if not compute_grad:
                return ei

            # Gradients of ei w.r.t. mean and variance
            g_ei_m = -ncdf
            g_ei_s2 = 0.5*npdf / func_s

            # Apply covariance function
            grad_cross = np.squeeze(cand_cross_grad)

            grad_xp_m = np.dot(alpha.transpose(),grad_cross)
            grad_xp_v = np.dot(-2*spla.cho_solve(
                    (obsv_chol, True),cand_cross).transpose(), grad_cross)

            grad_xp = 0.5*self.amp2*(grad_xp_m*g_ei_m + grad_xp_v*g_ei_s2)
            ei = -np.sum(ei)

            return ei, grad_xp.flatten()

        else:
            # If there are pending experiments, fantasize their outcomes.
            cand = np.reshape(cand, (-1, comp.shape[1]))

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov  = (self.cov(comp_pend) +
                              self.noise*np.eye(comp_pend.shape[0]))
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0],:comp.shape[0]]

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            npr.set_state(self.randomstate)
            pend_fant = np.dot(pend_chol, npr.randn(pend.shape[0],self.pending_samples)) + pend_m[:,None]

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:,np.newaxis],
                         (1,self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)
            cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha  = spla.cho_solve((comp_pend_chol, True),
                                    fant_vals - self.mean)
            beta   = spla.solve_triangular(comp_pend_chol, cand_cross,
                                           lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:,np.newaxis])
            u      = (bests[np.newaxis,:] - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            # Gradients of ei w.r.t. mean and variance
            g_ei_m = -ncdf
            g_ei_s2 = 0.5*npdf / func_s

            # Apply covariance function
            grad_cross = np.squeeze(cand_cross_grad)

            grad_xp_m = np.dot(alpha.transpose(),grad_cross)
            grad_xp_v = np.dot(-2*spla.cho_solve(
                    (comp_pend_chol, True),cand_cross).transpose(), grad_cross)

            grad_xp = 0.5*self.amp2*(grad_xp_m*np.tile(g_ei_m,(comp.shape[1],1)).T + (grad_xp_v.T*g_ei_s2).T)
            ei = -np.mean(ei, axis=1)
            grad_xp = np.mean(grad_xp,axis=0)

            return ei, grad_xp.flatten()

    def compute_ei(self, comp, pend, cand, vals):
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov   = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
            obsv_chol = spla.cholesky( obsv_cov, lower=True )

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u      = (best - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            return ei
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov  = (self.cov(comp_pend) +
                              self.noise*np.eye(comp_pend.shape[0]))
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0],:comp.shape[0]]

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            npr.set_state(self.randomstate)
            pend_fant = np.dot(pend_chol, npr.randn(pend.shape[0],self.pending_samples)) + pend_m[:,None]

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:,np.newaxis],
                         (1,self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)

            # Solve the linear systems.
            alpha  = spla.cho_solve((comp_pend_chol, True),
                                    fant_vals - self.mean)
            beta   = spla.solve_triangular(comp_pend_chol, cand_cross,
                                           lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:,np.newaxis])
            u      = (bests[np.newaxis,:] - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            return np.mean(ei, axis=1)

    def sample_hypers(self, comp, vals):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov   = (self.amp2 * (self.cov_func(ls, comp, None) +
                1e-6*np.eye(comp.shape[0])) + self.noise*np.eye(comp.shape[0]))
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp    = (-np.sum(np.log(np.diag(chol))) -
                      0.5*np.dot(vals-self.mean, solve))
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov   = (amp2 * (self.cov_func(self.ls, comp, None) +
                1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0]))
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale/noise)**2))

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(np.sqrt(amp2))/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array(
                [self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = hypers[2]

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = 1e-3

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0:
                return -np.inf

            cov   = (amp2 * (self.cov_func(self.ls, comp, None) +
                1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0]))
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(np.sqrt(amp2))/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array(
                [self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = 1e-3

    def optimize_hypers(self, comp, vals):
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp,vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise

        # Save hyperparameter samples
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.dump_hypers()

        return
Esempio n. 21
0
class ExperimentGrid:
    @staticmethod
    def job_running(expt_dir, id):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_running(id)

    @staticmethod
    def job_complete(expt_dir, id, value, duration):
        log("setting job %d complete" % id)
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_complete(id, value, duration)
        log("set...")

    @staticmethod
    def job_broken(expt_dir, id):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_broken(id)

    def __init__(self, expt_dir, variables=None, grid_size=None, grid_seed=1):
        self.expt_dir = expt_dir
        self.jobs_pkl = os.path.join(expt_dir, EXPERIMENT_GRID_FILE)
        self.locker = Locker()

        # Only one process at a time is allowed to have access to the grid.
        self.locker.lock_wait(self.jobs_pkl)

        # Set up the grid for the first time if it doesn't exist.
        if variables is not None and not os.path.exists(self.jobs_pkl):
            self.seed = grid_seed
            self.vmap = GridMap(variables, grid_size)
            self.grid = self._hypercube_grid(self.vmap.card(), grid_size)
            self.status = np.zeros(grid_size, dtype=int) + CANDIDATE_STATE
            self.values = np.zeros(grid_size) + np.nan
            self.durs = np.zeros(grid_size) + np.nan
            self.executed = np.zeros(grid_size) + 0
            self.proc_ids = np.zeros(grid_size, dtype=int)
            self._save_jobs()

        # Or load in the grid from the pickled file.
        else:
            self._load_jobs()

    def __del__(self):
        self._save_jobs()
        if self.locker.unlock(self.jobs_pkl):
            pass
        else:
            raise Exception("Could not release lock on job grid.\n")

    def get_grid(self):
        return self.grid, self.values, self.durs

    def get_candidates(self):
        return np.nonzero(self.status == CANDIDATE_STATE)[0]

    def get_pending(self):
        return np.nonzero((self.status == SUBMITTED_STATE)
                          | (self.status == RUNNING_STATE))[0]

    def get_complete(self):
        return np.nonzero(self.status == COMPLETE_STATE)[0]

    def get_broken(self):
        return np.nonzero(self.status == BROKEN_STATE)[0]

    def get_executed(self):
        return np.nonzero(self.executed == 1)[0]

    def get_params(self, index):
        return self.vmap.get_params(self.grid[index, :])

    def get_best(self):
        finite = self.values[np.isfinite(self.values)]
        if len(finite) > 0:
            cur_min = np.min(finite)
            index = np.nonzero(self.values == cur_min)[0][0]
            return cur_min, index
        else:
            return np.nan, -1

    def get_proc_id(self, id):
        return self.proc_ids[id]

    def add_to_grid(self, candidate):
        # Checks to prevent numerical over/underflow from corrupting the grid
        candidate[candidate > 1.0] = 1.0
        candidate[candidate < 0.0] = 0.0

        # Set up the grid
        self.grid = np.vstack((self.grid, candidate))
        self.status = np.append(self.status,
                                np.zeros(1, dtype=int) + int(CANDIDATE_STATE))

        self.values = np.append(self.values, np.zeros(1) + np.nan)
        self.durs = np.append(self.durs, np.zeros(1) + np.nan)
        self.proc_ids = np.append(self.proc_ids, np.zeros(1, dtype=int))

        # Save this out.
        self._save_jobs()
        return self.grid.shape[0] - 1

    def set_candidate(self, id):
        self.status[id] = CANDIDATE_STATE
        self._save_jobs()

    def set_submitted(self, id, proc_id):
        self.status[id] = SUBMITTED_STATE
        self.proc_ids[id] = proc_id
        self._save_jobs()

    def set_running(self, id):
        self.status[id] = RUNNING_STATE
        self._save_jobs()

    def set_complete(self, id, value, duration):
        self.status[id] = COMPLETE_STATE
        self.values[id] = value
        self.durs[id] = duration
        self.executed[id] = 1
        self._save_jobs()

    def set_broken(self, id):
        self.status[id] = BROKEN_STATE
        self._save_jobs()

    def _load_jobs(self):
        fh = open(self.jobs_pkl, 'r')
        jobs = cPickle.load(fh)
        fh.close()

        self.vmap = jobs['vmap']
        self.grid = jobs['grid']
        self.status = jobs['status']
        self.values = jobs['values']
        self.durs = jobs['durs']
        self.executed = jobs['executed']
        self.proc_ids = jobs['proc_ids']

    def _save_jobs(self):

        # Write everything to a temporary file first.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump(
            {
                'vmap': self.vmap,
                'grid': self.grid,
                'status': self.status,
                'values': self.values,
                'durs': self.durs,
                'executed': self.executed,
                'proc_ids': self.proc_ids
            },
            fh,
            protocol=-1)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.jobs_pkl)
        os.system(cmd)  # TODO: Should check system-dependent return status.

    def _hypercube_grid(self, dims, size):
        # Generate from a sobol sequence
        sobol_grid = np.transpose(i4_sobol_generate(dims, size, self.seed))

        return sobol_grid
Esempio n. 22
0
class GPEIChooser:

    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=10,
                 pending_samples=100, noiseless=False):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.mcmc_iters      = int(mcmc_iters)
        self.pending_samples = pending_samples
        self.D               = -1
        self.hyper_iters     = 1
        self.noiseless       = bool(int(noiseless))

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 2    # top-hat prior on length scales

    def __del__(self):
        self.locker.lock_wait(self.state_pkl)

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump({ 'dims'   : self.D,
                       'ls'     : self.ls,
                       'amp2'   : self.amp2,
                       'noise'  : self.noise,
                       'mean'   : self.mean },
                     fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd) # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

    def _real_init(self, dims, values):
        self.locker.lock_wait(self.state_pkl)

        if os.path.exists(self.state_pkl):
            fh    = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D     = state['dims']
            self.ls    = state['ls']
            self.amp2  = state['amp2']
            self.noise = state['noise']
            self.mean  = state['mean']
        else:

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values)+1e-4

            # Initial observation noise.
            self.noise = 1e-3

            # Initial mean.
            self.mean = np.mean(values)

        self.locker.unlock(self.state_pkl)

    def cov(self, x1, x2=None):
        if x2 is None:
            return self.amp2 * (self.cov_func(self.ls, x1, None)
                               + 1e-6*np.eye(x1.shape[0]))
        else:
            return self.amp2 * self.cov_func(self.ls, x1, x2)

    def next(self, grid, values, durations, candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete])

        # Grab out the relevant sets.
        comp = grid[complete,:]
        cand = grid[candidates,:]
        pend = grid[pending,:]
        vals = values[complete]

        if self.mcmc_iters > 0:
            # Sample from hyperparameters.

            overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))

            for mcmc_iter in xrange(self.mcmc_iters):

                self.sample_hypers(comp, vals)
                log("mean: %f  amp: %f  noise: %f  min_ls: %f  max_ls: %f"
                                 % (self.mean, np.sqrt(self.amp2), self.noise, np.min(self.ls), np.max(self.ls)))

                overall_ei[:,mcmc_iter] = self.compute_ei(comp, pend, cand, vals)

            best_cand = np.argmax(np.mean(overall_ei, axis=1))

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            try:
                self.optimize_hypers(comp, vals)
            except:
                # Initial length scales.
                self.ls = np.ones(self.D)
                # Initial amplitude.
                self.amp2 = np.std(vals)
                # Initial observation noise.
                self.noise = 1e-3
            log("mean: %f  amp: %f  noise: %f  min_ls: %f  max_ls: %f"
                             % (self.mean, np.sqrt(self.amp2), self.noise, np.min(self.ls),
                                np.max(self.ls)))

            ei = self.compute_ei(comp, pend, cand, vals)

            best_cand = np.argmax(ei)

            return int(candidates[best_cand])

    def compute_ei(self, comp, pend, cand, vals):
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov   = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
            obsv_chol = spla.cholesky( obsv_cov, lower=True )

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u      = (best - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            return ei
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov  = self.cov(comp_pend) + self.noise*np.eye(comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0],:comp.shape[0]]

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = (np.dot(pend_chol, npr.randn(pend.shape[0],self.pending_samples))
                         + pend_m[:,None])

            # Include the fantasies.
            fant_vals = np.concatenate((np.tile(vals[:,np.newaxis],
                                                (1,self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)

            # Solve the linear systems.
            alpha  = spla.cho_solve((comp_pend_chol, True), fant_vals - self.mean)
            beta   = spla.solve_triangular(comp_pend_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:,np.newaxis])
            u      = (bests[np.newaxis,:] - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            return np.mean(ei, axis=1)

    def sample_hypers(self, comp, vals):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov   = self.amp2 * (self.cov_func(ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-self.mean, solve)
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov   = amp2 * (self.cov_func(self.ls, comp, None) +
                            1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale/noise)**2))

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]),
                                   logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = hypers[2]

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = 1e-3

            if amp2 < 0:
                return -np.inf

            cov   = amp2 * (self.cov_func(self.ls, comp, None) +
                            1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob,
                                   compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = 1e-3

    def optimize_hypers(self, comp, vals):
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp,vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise

        # Save hyperparameter samples
        #self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        #self.dump_hypers()

        return
Esempio n. 23
0
class ExperimentGrid:

    @staticmethod
    def job_running(expt_dir, id):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_running(id)

    @staticmethod
    def job_complete(expt_dir, id, value, duration):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_complete(id, value, duration)

    @staticmethod
    def job_broken(expt_dir, id):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_broken(id)

    def __init__(self, expt_dir, variables=None, grid_size=None, grid_seed=1):
        self.expt_dir = expt_dir
        self.jobs_pkl = os.path.join(expt_dir, 'expt-grid.pkl')
        self.locker   = Locker()

        # Only one process at a time is allowed to have access to this.
        sys.stderr.write("Waiting to lock grid...")
        self.locker.lock_wait(self.jobs_pkl)
        sys.stderr.write("...acquired\n")

        # Does this exist already?
        if variables is not None and not os.path.exists(self.jobs_pkl):

            # Set up the grid for the first time.
            self.seed = grid_seed
            self.vmap   = GridMap(variables, grid_size)
            self.grid   = self.hypercube_grid(self.vmap.card(), grid_size)
            self.status = np.zeros(grid_size, dtype=int) + CANDIDATE_STATE
            self.values = np.zeros(grid_size) + np.nan
            self.durs   = np.zeros(grid_size) + np.nan
            self.sgeids = np.zeros(grid_size, dtype=int)

            # Save this out.
            self._save_jobs()
        else:

            # Load in from the pickle.
            self._load_jobs()

    def __del__(self):
        self._save_jobs()
        if self.locker.unlock(self.jobs_pkl):
            sys.stderr.write("Released lock on job grid.\n")
        else:
            raise Exception("Could not release lock on job grid.\n")

    def get_grid(self):
        return self.grid, self.values, self.durs

    def get_candidates(self):
        return np.nonzero(self.status == CANDIDATE_STATE)[0]

    def get_pending(self):
        return np.nonzero((self.status == SUBMITTED_STATE) | (self.status == RUNNING_STATE))[0]

    def get_complete(self):
        return np.nonzero(self.status == COMPLETE_STATE)[0]

    def get_broken(self):
        return np.nonzero(self.status == BROKEN_STATE)[0]

    def get_params(self, index):
        return self.vmap.get_params(self.grid[index,:])

    def get_best(self):
        finite = self.values[np.isfinite(self.values)]
        if len(finite) > 0:
            cur_min = np.min(finite)
            index   = np.nonzero(self.values==cur_min)[0][0]
            return cur_min, index
        else:
            return np.nan, -1

    def get_sgeid(self, id):
        return self.sgeids[id]

    def add_to_grid(self, candidate):
        # Set up the grid
        self.grid   = np.vstack((self.grid, candidate))
        self.status = np.append(self.status, np.zeros(1, dtype=int) + 
                                int(CANDIDATE_STATE))
        
        self.values = np.append(self.values, np.zeros(1)+np.nan)
        self.durs   = np.append(self.durs, np.zeros(1)+np.nan)
        self.sgeids = np.append(self.sgeids, np.zeros(1,dtype=int))

        # Save this out.
        self._save_jobs()
        return self.grid.shape[0]-1

    def set_candidate(self, id):
        self.status[id] = CANDIDATE_STATE
        self._save_jobs()

    def set_submitted(self, id, sgeid):
        self.status[id] = SUBMITTED_STATE
        self.sgeids[id] = sgeid
        self._save_jobs()

    def set_running(self, id):
        self.status[id] = RUNNING_STATE
        self._save_jobs()

    def set_complete(self, id, value, duration):
        self.status[id] = COMPLETE_STATE
        self.values[id] = value
        self.durs[id]   = duration
        self._save_jobs()

    def set_broken(self, id):
        self.status[id] = BROKEN_STATE
        self._save_jobs()

    def _load_jobs(self):
        fh   = open(self.jobs_pkl, 'r')
        jobs = cPickle.load(fh)
        fh.close()

        self.vmap   = jobs['vmap']
        self.grid   = jobs['grid']
        self.status = jobs['status']
        self.values = jobs['values']
        self.durs   = jobs['durs']
        self.sgeids = jobs['sgeids']

    def _save_jobs(self):

        # Write everything to a temporary file first.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump({ 'vmap'   : self.vmap,
                       'grid'   : self.grid,
                       'status' : self.status,
                       'values' : self.values,
                       'durs'   : self.durs,
                       'sgeids' : self.sgeids }, fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.jobs_pkl)
        os.system(cmd) # TODO: Should check system-dependent return status.
    
    def _hypercube_grid(self, dims, size):
        # Generate from a sobol sequence
        #sobol_grid = np.transpose(i4_sobol_generate(dims,size,self.seed))
        sobol_grid = sobol_generate(dims,size,self.seed)
        return sobol_grid
class GPEIConstrainedChooser:

    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=10, 
                 pending_samples=100, noiseless=False, burnin=100,
                 grid_subset=20, constraint_violating_value=-1):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.stats_file      = os.path.join(expt_dir, 
                                   self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters      = int(mcmc_iters)
        self.burnin          = int(burnin)
        self.needs_burnin    = True
        self.pending_samples = pending_samples
        self.D               = -1
        self.hyper_iters     = 1
        # Number of points to optimize EI over
        self.grid_subset     = int(grid_subset)
        self.noiseless       = bool(int(noiseless))
        self.hyper_samples   = []
        self.constraint_hyper_samples = []
        self.ff              = None
        self.ff_samples      = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 2    # top-hat prior on length scales

        self.constraint_noise_scale = 0.1  # horseshoe prior
        self.constraint_amp2_scale  = 1    # zero-mean log normal prio
        self.constraint_gain        = 1   # top-hat prior on length scales
        self.constraint_max_ls      = 2   # top-hat prior on length scales
        self.bad_value = float(constraint_violating_value)

    # A simple function to dump out hyperparameters to allow for a hot start
    # if the optimization is restarted.
    def dump_hypers(self):
        sys.stderr.write("Waiting to lock hyperparameter pickle...")
        self.locker.lock_wait(self.state_pkl)
        sys.stderr.write("...acquired\n")

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump({ 'dims'        : self.D,
                       'ls'          : self.ls,
                       'amp2'        : self.amp2,
                       'noise'       : self.noise,
                       'mean'        : self.mean,
                       'constraint_ls'     : self.constraint_ls,
                       'constraint_amp2'   : self.constraint_amp2,
                       'constraint_noise'  : self.constraint_noise,
                       'constraint_mean'   : self.constraint_mean },
                     fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd) # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

    def _real_init(self, dims, values, durations):
        
        sys.stderr.write("Waiting to lock hyperparameter pickle...")
        self.locker.lock_wait(self.state_pkl)
        sys.stderr.write("...acquired\n")

        if os.path.exists(self.state_pkl):            
            fh    = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D          = state['dims']
            self.ls         = state['ls']
            self.amp2       = state['amp2']
            self.noise      = state['noise']
            self.mean       = state['mean']
            self.constraint_ls    = state['constraint_ls']
            self.constraint_amp2  = state['constraint_amp2']
            self.constraint_noise = state['constraint_noise']
            self.constraint_mean  = state['constraint_mean']
            self.constraint_gain  = state['constraint_mean']
            self.needs_burnin     = False
        else:

            # Identify constraint violations
            goodvals = np.nonzero(values != self.bad_value)[0]

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)
            self.constraint_ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values[goodvals])
            self.constraint_amp2 = 1#np.std(durations)

            # Initial observation noise.
            self.noise = 1e-3
            self.constraint_noise = 1e-3
            self.constraint_gain = 1

            # Initial mean.
            self.mean = np.mean(values[goodvals])
            self.constraint_mean = 0.5

        self.locker.unlock(self.state_pkl)

    def cov(self, amp2, ls, x1, x2=None):
        if x2 is None:
            return amp2 * (self.cov_func(ls, x1, None) 
                           + 1e-6*np.eye(x1.shape[0]))
        else:
            return amp2 * self.cov_func(ls, x1, x2)

    # Given a set of completed 'experiments' in the unit hypercube with
    # corresponding objective 'values', pick from the next experiment to
    # run according to the acquisition function.
    def next(self, grid, values, durations,
             candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete],
                            durations[complete])

        # Grab out the relevant sets.
        comp = grid[complete,:]
        cand = grid[candidates,:]
        pend = grid[pending,:]
        vals = values[complete]

        # Find which completed jobs violated constraints
        badvals = np.nonzero(vals == self.bad_value)[0]
        goodvals = np.nonzero(vals != self.bad_value)[0]
        print 'Found %d constraint violating jobs' % (badvals.shape[0])

        labels = np.zeros(vals.shape[0])
        labels[goodvals] = 1

        if comp.shape[0] < 2:
            return int(candidates[0])

        # Spray a set of candidates around the min so far 
        numcand = cand.shape[0]
        best_comp = np.argmin(vals)
        cand2 = np.vstack((np.random.randn(10,comp.shape[1])*0.001 +
                           comp[best_comp,:], cand))

        if self.mcmc_iters > 0:

            # Possibly burn in.
            if self.needs_burnin:
                for mcmc_iter in xrange(self.burnin):
                    self.sample_constraint_hypers(comp, labels)
                    self.sample_hypers(comp[goodvals,:], vals[goodvals])
                    sys.stderr.write("BURN %d/%d] mean: %.2f  amp: %.2f "
                                     "noise: %.4f  min_ls: %.4f  max_ls: %.4f\n"
                                     % (mcmc_iter+1, self.burnin, self.mean,
                                        np.sqrt(self.amp2), self.noise,
                                        np.min(self.ls), np.max(self.ls)))
                self.needs_burnin = False
            
            # Sample from hyperparameters.
            # Adjust the candidates to hit ei/sec peaks
            self.hyper_samples = []
            for mcmc_iter in xrange(self.mcmc_iters):
                self.sample_constraint_hypers(comp, labels)
                self.sample_hypers(comp[goodvals,:], vals[goodvals])
                sys.stderr.write("%d/%d] mean: %.2f  amp: %.2f  noise: %.4f "
                                 "min_ls: %.4f  max_ls: %.4f\n"
                                 % (mcmc_iter+1, self.mcmc_iters, self.mean,
                                    np.sqrt(self.amp2), self.noise, 
                                    np.min(self.ls), np.max(self.ls)))

                sys.stderr.write("%d/%d] constraint_mean: %.2f "
                                 "constraint_amp: %.2f  constraint_gain: %.4f "
                                 "constraint_min_ls: %.4f  constraint_max_ls: "
                                 "%.4f\n"
                                 % (mcmc_iter+1, self.mcmc_iters, 
                                    self.constraint_mean,
                                    np.sqrt(self.constraint_amp2), 
                                    self.constraint_gain,
                                    np.min(self.constraint_ls), 
                                    np.max(self.constraint_ls)))
            self.dump_hypers()
            comp_preds = np.zeros(labels.shape[0]).flatten()
            
            preds = self.pred_constraint_voilation(cand, comp, labels).flatten()
            for ii in xrange(self.mcmc_iters):
                constraint_hyper = self.constraint_hyper_samples[ii]            
                self.ff = self.ff_samples[ii]
                self.constraint_mean = constraint_hyper[0]
                self.constraint_gain = constraint_hyper[1]
                self.constraint_amp2 = constraint_hyper[2]
                self.constraint_ls = constraint_hyper[3]
                comp_preds += self.pred_constraint_voilation(comp, comp, 
                                                             labels).flatten()
            comp_preds = comp_preds / float(self.mcmc_iters)
            print 'Prediction %f percent violations (%d/%d): ' % (
            np.mean(preds < 0.5), np.sum(preds < 0.5), preds.shape[0])
            print 'Prediction %f percent train accuracy (%d/%d): ' % (
            np.mean((comp_preds > 0.5) == labels), np.sum((comp_preds > 0.5) 
                    == labels), comp_preds.shape[0])

            if False:
                delta = 0.025
                x = np.arange(0, 1.0, delta)
                y = np.arange(0, 1.0, delta)
                X, Y = np.meshgrid(x, y)

                cpreds = np.zeros((X.shape[0], X.shape[1]))
                predei = np.zeros((X.shape[0], X.shape[1]))
                predei2 = np.zeros((X.shape[0], X.shape[1]))
                for ii in xrange(self.mcmc_iters):
                    constraint_hyper = self.constraint_hyper_samples[ii]
                    self.ff = self.ff_samples[ii]
                    self.constraint_mean = constraint_hyper[0]
                    self.constraint_gain = constraint_hyper[1]
                    self.constraint_amp2 = constraint_hyper[2]
                    self.constraint_ls = constraint_hyper[3]

                    cpred = self.pred_constraint_voilation(np.hstack((X.flatten()[:,np.newaxis], Y.flatten()[:,np.newaxis])), comp, labels)
                    pei = self.compute_ei_per_s(comp, pend, np.hstack((X.flatten()[:,np.newaxis], Y.flatten()[:,np.newaxis])), vals, labels)
                    pei2 = self.compute_ei(comp, pend, np.hstack((X.flatten()[:,np.newaxis], Y.flatten()[:,np.newaxis])), vals, labels)
                
                    cpreds += np.reshape(cpred, (X.shape[0], X.shape[1]))
                    predei += np.reshape(pei, (X.shape[0], X.shape[1]))
                    predei2 += np.reshape(pei2, (X.shape[0], X.shape[1]))

                plt.figure(1)
                cpreds = cpreds/float(self.mcmc_iters)
                CS = plt.contour(X,Y,cpreds)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0,0], comp[labels == 0,1], 'rx')
                plt.plot(comp[labels == 1,0], comp[labels == 1,1], 'bx')
                plt.title('Contours of Classification GP (Prob of not being a constraint violation)')
                plt.legend(('Constraint Violations', 'Good points'),'lower left')
                plt.savefig('constrained_ei_chooser_class_contour.pdf')

                plt.figure(2)
                predei = predei/float(self.mcmc_iters)
                CS = plt.contour(X,Y,predei)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0,0], comp[labels == 0,1], 'rx')
                plt.plot(comp[labels == 1,0], comp[labels == 1,1], 'bx')
                plt.title('Contours of EI*P(not violating constraint)')
                plt.legend(('Constraint Violations', 'Good points'),'lower left')
                plt.savefig('constrained_ei_chooser_eitimesprob_contour.pdf')

                plt.figure(3)
                predei2 = predei2/float(self.mcmc_iters)
                CS = plt.contour(X,Y,predei2)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0,0], comp[labels == 0,1], 'rx')
                plt.plot(comp[labels == 1,0], comp[labels == 1,1], 'bx')
                plt.title('Contours of EI')
                plt.legend(('Constraint Violations', 'Good points'),'lower left')
                plt.savefig('constrained_ei_chooser_ei_contour.pdf')
                plt.show()

            # Pick the top candidates to optimize over                
            overall_ei = self.ei_over_hypers(comp,pend,cand2,vals,labels)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds,:]

            # Adjust the candidates to hit ei peaks
            b = []# optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))
                
            for i in xrange(0, cand2.shape[0]):
                sys.stderr.write("Optimizing candidate %d/%d\n" %
                                 (i+1, cand2.shape[0]))
                self.check_grad_ei_per(cand2[i,:], comp, vals, labels)
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers,
                                        cand2[i,:].flatten(),
                                        args=(comp,vals,labels,True),
                                        bounds=b, disp=0)
                cand2[i,:] = ret[0]

            cand = np.vstack((cand, cand2))

            overall_ei = self.ei_over_hypers(comp,pend,cand,vals,labels)
            best_cand = np.argmax(np.mean(overall_ei, axis=1))

            self.dump_hypers()
            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand,:])

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            self.optimize_hypers(comp, vals, labels)

            sys.stderr.write("mean: %f  amp: %f  noise: %f "
                             "min_ls: %f  max_ls: %f\n"
                             % (self.mean, np.sqrt(self.amp2),
                                self.noise, np.min(self.ls), np.max(self.ls)))

            # Pick the top candidates to optimize over
            ei = self.compute_ei_per_s(comp, pend, cand2, vals, labels)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds,:]

            # Adjust the candidates to hit ei peaks
            b = []# optimization bounds
            for i in xrange(0, cand.shape[1]):
                b.append((0, 1))
                
            for i in xrange(0, cand2.shape[0]):
                sys.stderr.write("Optimizing candidate %d/%d\n" % 
                                 (i+1, cand2.shape[0]))
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei,
                                        cand2[i,:].flatten(),
                                        args=(comp,vals,labels,True),
                                        bounds=b, disp=0)
                cand2[i,:] = ret[0]
                
            cand = np.vstack((cand, cand2))
            ei = self.compute_ei_per_s(comp, pend, cand, vals, labels)

            best_cand = np.argmax(ei)
            self.dump_hypers()

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand,:])

            return int(candidates[best_cand])

    # Predict constraint voilating points
    def pred_constraint_voilation(self, cand, comp, vals):
        # The primary covariances for prediction.
        comp_cov   = self.cov(self.constraint_amp2, self.constraint_ls, comp)
        cand_cross = self.cov(self.constraint_amp2, self.constraint_ls, comp, cand)

        # Compute the required Cholesky.
        obsv_cov  = comp_cov + self.constraint_noise*np.eye(comp.shape[0])
        obsv_chol = spla.cholesky(obsv_cov, lower=True)

        cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.constraint_ls, comp, cand)

        # Predictive things.
        # Solve the linear systems.
        alpha  = spla.cho_solve((obsv_chol, True), self.ff)# - self.constraint_mean)
        beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha)# + self.constraint_mean
        func_m = 1./(1 + np.exp(-self.constraint_gain*func_m))

        return func_m

    # Compute EI over hyperparameter samples
    def ei_over_hypers(self,comp,pend,cand,vals,labels):
        overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))
        for mcmc_iter in xrange(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            constraint_hyper = self.constraint_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.constraint_mean = constraint_hyper[0]
            self.constraint_gain = constraint_hyper[1]
            self.constraint_amp2 = constraint_hyper[2]
            self.constraint_ls = constraint_hyper[3]
            overall_ei[:,mcmc_iter] = self.compute_ei_per_s(comp, pend, cand,
                                                            vals, labels)
            
        return overall_ei

    # Adjust points by optimizing EI over a set of hyperparameter samples
    def grad_optimize_ei_over_hypers(self, cand, comp, vals, labels, compute_grad=True):
        summed_ei = 0
        summed_grad_ei = np.zeros(cand.shape).flatten()

        for mcmc_iter in xrange(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            constraint_hyper = self.constraint_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.constraint_mean = constraint_hyper[0]
            self.constraint_gain = constraint_hyper[1]
            self.constraint_amp2 = constraint_hyper[2]
            self.constraint_ls = constraint_hyper[3]

            if compute_grad:
                (ei,g_ei) = self.grad_optimize_ei(cand,comp,vals,labels,compute_grad)
                summed_grad_ei = summed_grad_ei + g_ei
            else:
                ei = self.grad_optimize_ei(cand,comp,vals,labels,compute_grad)
                
            summed_ei += ei

        if compute_grad:
            return (summed_ei, summed_grad_ei)
        else:
            return summed_ei

    def check_grad_ei_per(self, cand, comp, vals, labels):
        (ei,dx1) = self.grad_optimize_ei_over_hypers(cand, comp, vals, labels)
        dx2 = dx1*0
        idx = np.zeros(cand.shape[0])
        for i in xrange(0, cand.shape[0]):
            idx[i] = 1e-6
            (ei1,tmp) = self.grad_optimize_ei_over_hypers(cand + idx, comp, vals, labels)
            (ei2,tmp) = self.grad_optimize_ei_over_hypers(cand - idx, comp, vals, labels)
            dx2[i] = (ei - ei2)/(2*1e-6)
            idx[i] = 0
        print 'computed grads', dx1
        print 'finite diffs', dx2
        print (dx1/dx2)
        print np.sum((dx1 - dx2)**2)
        time.sleep(2)

    def grad_optimize_ei(self, cand, comp, vals, labels, compute_grad=True):
        # Here we have to compute the gradients for constrained ei
        # This means deriving through the two kernels, the one for predicting
        # constraint violations and the one predicting ei

        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        best = np.min(vals)
        cand = np.reshape(cand, (-1, comp.shape[1]))

        # First we make predictions for the durations
        # Compute covariances
        comp_constraint_cov   = self.cov(self.constraint_amp2, self.constraint_ls, 
                                         compfull)
        cand_constraint_cross = self.cov(self.constraint_amp2, self.constraint_ls,
                                         compfull,cand)

        # Cholesky decompositions
        obsv_constraint_cov  = comp_constraint_cov + self.constraint_noise*np.eye(
            compfull.shape[0])
        obsv_constraint_chol = spla.cholesky( obsv_constraint_cov, lower=True)

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_constraint_chol, True), 
                                  self.ff)# - self.constraint_mean)

        # Predict marginal mean times and (possibly) variances
        func_constraint_m = np.dot(cand_constraint_cross.T, t_alpha)

        # Squash through logistic to get probabilities
        func_constraint_m = 1./(1+np.exp(-self.constraint_gain*func_constraint_m))

        # Apply covariance function
        cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.constraint_ls, compfull, cand)
        grad_cross_t = np.squeeze(cand_cross_grad)

        # Now compute the gradients w.r.t. ei
        # The primary covariances for prediction.
        comp_cov   = self.cov(self.amp2, self.ls, comp)
        cand_cross = self.cov(self.amp2, self.ls, comp, cand)
        comp_cov_full   = self.cov(self.amp2, self.ls, compfull)
        cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

        # Compute the required Cholesky.
        obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
        obsv_chol = spla.cholesky( obsv_cov, lower=True )
        obsv_cov_full  = comp_cov_full + self.noise*np.eye(compfull.shape[0])
        obsv_chol_full = spla.cholesky( obsv_cov_full, lower=True)

        # Predictive things.
        # Solve the linear systems.
        alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
        #beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)
        beta   = spla.solve_triangular(obsv_chol_full, cand_cross_full, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)
        
        # Expected improvement
        func_s = np.sqrt(func_v)
        u      = (best - func_m) / func_s
        ncdf   = sps.norm.cdf(u)
        npdf   = sps.norm.pdf(u)
        ei     = func_s*(u*ncdf + npdf)

        ei_per_s = -np.sum(ei*func_constraint_m)
        if not compute_grad:
            return ei_per_s

        grad_constraint_xp_m = np.dot(t_alpha.transpose(),grad_cross_t)

        # Gradients of ei w.r.t. mean and variance
        g_ei_m = -ncdf
        g_ei_s2 = 0.5*npdf / func_s

        # Apply covariance function
        cand_cross_grad = cov_grad_func(self.ls, comp, cand)
        grad_cross = np.squeeze(cand_cross_grad)

        cand_cross_grad_full = cov_grad_func(self.ls, compfull, cand)
        grad_cross_full = np.squeeze(cand_cross_grad_full)
        
        grad_xp_m = np.dot(alpha.transpose(),grad_cross)
        #grad_xp_v = np.dot(-2*spla.cho_solve((obsv_chol, True),
        #                                     cand_cross).transpose(),grad_cross)
        grad_xp_v = np.dot(-2*spla.cho_solve((obsv_chol_full, True),
                                             cand_cross_full).transpose(),grad_cross_full)
        
        grad_xp = 0.5*self.amp2*(grad_xp_m*g_ei_m + grad_xp_v*g_ei_s2)
        grad_constraint_xp_m = 0.5*self.constraint_amp2*self.constraint_gain*grad_constraint_xp_m*func_constraint_m*(1-func_constraint_m)

        grad_xp = (func_constraint_m*grad_xp + ei*grad_constraint_xp_m)

        return ei_per_s, grad_xp.flatten()

    def compute_ei_per_s(self, comp, pend, cand, vals, labels):
        # First we make predictions for the durations as that
        # doesn't depend on pending experiments
        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        # Compute covariances
        comp_constraint_cov   = self.cov(self.constraint_amp2, self.constraint_ls, 
                                         compfull)
        cand_constraint_cross = self.cov(self.constraint_amp2, self.constraint_ls,
                                         compfull,cand)

        # Cholesky decompositions
        obsv_constraint_cov  = comp_constraint_cov + self.constraint_noise*np.eye(
            compfull.shape[0])
        obsv_constraint_chol = spla.cholesky( obsv_constraint_cov, lower=True )

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_constraint_chol, True), self.ff)# - self.constraint_mean)
        #t_beta   = spla.solve_triangular(obsv_constraint_chol, cand_constraint_cross, lower=True)

        # Predict marginal mean times and (possibly) variances
        func_constraint_m = (np.dot(cand_constraint_cross.T, t_alpha))# + self.constraint_mean)

        # We don't really need the time variances now
        #func_constraint_v = self.constraint_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Squash through a logistic to get probability of not violating a constraint
        func_constraint_m = 1./(1+np.exp(-self.constraint_gain*func_constraint_m))
        
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov   = self.cov(self.amp2, self.ls, comp)
            comp_cov_full = self.cov(self.amp2, self.ls, compfull)
            cand_cross = self.cov(self.amp2, self.ls, comp, cand)
            cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

            # Compute the required Cholesky.
            obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
            obsv_cov_full  = comp_cov_full + self.noise*np.eye(compfull.shape[0])
            obsv_chol = spla.cholesky( obsv_cov, lower=True )
            obsv_chol_full = spla.cholesky( obsv_cov_full, lower=True )

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            #beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)
            beta   = spla.solve_triangular(obsv_chol_full, cand_cross_full,
                                           lower=True)
            
            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u      = (best - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            ei_per_s = ei*func_constraint_m
            return ei_per_s
        else:
            # If there are pending experiments, fantasize their outcomes.
            
            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov  = self.cov(self.amp2, self.ls, comp_pend) + self.noise*np.eye(comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(self.amp2, self.ls, comp, pend)
            pend_kappa = self.cov(self.amp2, self.ls, pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0],:comp.shape[0]]

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = np.dot(pend_chol, npr.randn(pend.shape[0],self.pending_samples)) + self.mean

            # Include the fantasies.
            fant_vals = np.concatenate((np.tile(vals[:,np.newaxis], 
                                                (1,self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(self.amp2, self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha  = spla.cho_solve((comp_pend_chol, True), fant_vals - self.mean)
            beta   = spla.solve_triangular(comp_pend_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:,np.newaxis])
            u      = (bests[np.newaxis,:] - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            return np.mean(ei, axis=1)*func_constraint_m

    def compute_ei(self, comp, pend, cand, vals, labels):
        # First we make predictions for the durations as that
        # doesn't depend on pending experiments
        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        # Compute covariances
        comp_constraint_cov   = self.cov(self.constraint_amp2, self.constraint_ls, 
                                         compfull)
        cand_constraint_cross = self.cov(self.constraint_amp2, self.constraint_ls,
                                         compfull,cand)

        # Cholesky decompositions
        obsv_constraint_cov  = comp_constraint_cov + self.constraint_noise*np.eye(
            compfull.shape[0])
        obsv_constraint_chol = spla.cholesky( obsv_constraint_cov, lower=True )

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_constraint_chol, True), self.ff)# - self.constraint_mean)
        #t_beta   = spla.solve_triangular(obsv_constraint_chol, cand_constraint_cross, lower=True)

        # Predict marginal mean times and (possibly) variances
        func_constraint_m = (np.dot(cand_constraint_cross.T, t_alpha))# + self.constraint_mean)

        # We don't really need the time variances now
        #func_constraint_v = self.constraint_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Squash through a logistic to get probability of not violating a constraint
        func_constraint_m = 1./(1+np.exp(-self.constraint_gain*func_constraint_m))
        
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov   = self.cov(self.amp2, self.ls, comp)
            comp_cov_full = self.cov(self.amp2, self.ls, compfull)
            cand_cross = self.cov(self.amp2, self.ls, comp, cand)
            cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

            # Compute the required Cholesky.
            obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
            obsv_cov_full  = comp_cov_full + self.noise*np.eye(compfull.shape[0])
            obsv_chol = spla.cholesky( obsv_cov, lower=True )
            obsv_chol_full = spla.cholesky( obsv_cov_full, lower=True )

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)
            #beta   = spla.solve_triangular(obsv_chol_full, cand_cross_full, lower=True)
            
            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u      = (best - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            ei_per_s = ei
            #ei_per_s = ei
            return ei
        else:
            return 0

    def sample_constraint_hypers(self, comp, labels):
        # The latent GP projection
        if self.ff is None:
            comp_cov   = self.cov(self.amp2, self.ls, comp)
            obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
            obsv_chol = spla.cholesky( obsv_cov, lower=True )
            self.ff = np.dot(obsv_chol,npr.randn(obsv_chol.shape[0]))

        self._sample_constraint_noisy(comp, labels)
        self._sample_constraint_ls(comp, labels)
        self.constraint_hyper_samples.append((self.constraint_mean, self.constraint_gain, self.constraint_amp2,
                                        self.constraint_ls))
        self.ff_samples.append(self.ff)

    def sample_hypers(self, comp, vals):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)
        
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf
            
            cov   = self.amp2 * (self.cov_func(ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp    = (-np.sum(np.log(np.diag(chol))) - 
                      0.5*np.dot(vals-self.mean, solve))
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_constraint_ls(self, comp, vals):
        def lpSigmoid(ff, gain=self.constraint_gain):
            probs = 1./(1. + np.exp(-gain*ff));
            probs[probs <= 0] = 1e-12
            probs[probs >= 1] = 1-1e-12
            llh   = np.sum(vals*np.log(probs) + (1-vals)*np.log(1-probs));
            return llh

        def updateGain(gain):
            if gain < 0.01 or gain > 10:
                return -np.inf

            cov   = self.constraint_amp2 * (self.cov_func(self.constraint_ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.constraint_noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals)# - self.constraint_mean)
            #lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(self.ff, solve)
            lp   = lpSigmoid(self.ff, gain)

            return lp

        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.constraint_max_ls):
                return -np.inf
        
            cov   = self.constraint_amp2 * (self.cov_func(ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.constraint_noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), self.ff)# - self.constraint_mean)
            #lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(self.ff, solve)

            lp   = lpSigmoid(self.ff)

            return lp

        #hypers = util.slice_sample(np.hstack((self.constraint_ls, self.ff)), logprob, compwise=True)
        hypers = util.slice_sample(self.constraint_ls, logprob, compwise=True)
        self.constraint_ls = hypers

        cov   = self.constraint_amp2 * (self.cov_func(self.constraint_ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.constraint_noise*np.eye(comp.shape[0])
        chol  = spla.cholesky(cov, lower=False)
        ff = self.ff
        for jj in xrange(20):
            (ff, lpell) = self.elliptical_slice(ff, chol, lpSigmoid);
            
        self.ff = ff

        # Update gain
        hypers = util.slice_sample(np.array([self.constraint_gain]), updateGain, compwise=True)
        self.constraint_gain = hypers

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = hypers[2]
            
            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf
            
            if amp2 < 0 or noise < 0:
                return -np.inf
            
            cov   = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale/noise)**2))
            #lp -= 0.5*(np.log(noise)/self.noise_scale)**2

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = hypers[2]

    def _sample_constraint_noisy(self, comp, vals):
        def lpSigmoid(ff,gain=self.constraint_gain):
            probs = 1./(1. + np.exp(-gain*ff));
            probs[probs <= 0] = 1e-12
            probs[probs >= 1] = 1-1e-12
            llh   = np.sum(vals*np.log(probs) + (1-vals)*np.log(1-probs));
            return llh

        def logprob(hypers):
            #mean  = hypers[0]
            amp2  = hypers[0]
            #gain = hypers[2]
            ff = hypers[1:]

            # This is pretty hacky, but keeps things sane.
            #if mean > np.max(vals) or mean < np.min(vals):
            #    return -np.inf

            if amp2 < 0:
                return -np.inf

            noise = self.constraint_noise
            cov   = amp2 * (self.cov_func(self.constraint_ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), ff)# - mean)
            #lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(ff-mean, solve)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(ff, solve)

            # Roll in noise horseshoe prior.
            #lp += np.log(np.log(1 + (self.constraint_noise_scale/noise)**2))
            #lp -= 0.5*(np.log(noise)/self.constraint_noise_scale)**2

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.constraint_amp2_scale)**2

            #lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(self.ff, solve)
            lp   += lpSigmoid(ff,self.constraint_gain)

            return lp

        hypers = util.slice_sample(np.hstack((np.array([self.constraint_amp2]), self.ff)), logprob, compwise=False)
        #self.constraint_mean  = hypers[0]
        self.constraint_amp2  = hypers[0]
        #self.constraint_gain = hypers[2]
        self.ff = hypers[1:]
        cov   = self.constraint_amp2 * (self.cov_func(self.constraint_ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.constraint_noise*np.eye(comp.shape[0])
        chol  = spla.cholesky(cov, lower=False)
        ff = self.ff
        for jj in xrange(50):
            (ff, lpell) = self.elliptical_slice(ff, chol, lpSigmoid);            
        self.ff = ff

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = 1e-3

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0:
                return -np.inf
            
            cov   = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = 1e-3

    def elliptical_slice(self, xx, chol_Sigma, log_like_fn, cur_log_like=None, angle_range=0):
        D = xx.shape[0]

        if cur_log_like is None:
            cur_log_like = log_like_fn(xx)

        nu = np.dot(chol_Sigma.T,np.random.randn(D, 1)).flatten()
        hh = np.log(np.random.rand()) + cur_log_like
        
        # Set up a bracket of angles and pick a first proposal.
        # "phi = (theta'-theta)" is a change in angle.
        if angle_range <= 0:
            # Bracket whole ellipse with both edges at first proposed point
            phi = np.random.rand()*2*math.pi;
            phi_min = phi - 2*math.pi;
            phi_max = phi;
        else:
            # Randomly center bracket on current point
            phi_min = -angle_range*np.random.rand();
            phi_max = phi_min + angle_range;
            phi = np.random.rand()*(phi_max - phi_min) + phi_min;

        # Slice sampling loop
        while True:
            # Compute xx for proposed angle difference and check if it's on the slice
            xx_prop = xx*np.cos(phi) + nu*np.sin(phi);

            cur_log_like = log_like_fn(xx_prop);
            if cur_log_like > hh:
                # New point is on slice, ** EXIT LOOP **
                break;

            # Shrink slice to rejected point
            if phi > 0:
                phi_max = phi;
            elif phi < 0:
                phi_min = phi;
            else:
                raise Exception('BUG DETECTED: Shrunk to current position and still not acceptable.');

            # Propose new angle difference
            phi = np.random.rand()*(phi_max - phi_min) + phi_min;

        xx = xx_prop;
        return (xx, cur_log_like)

    def optimize_hypers(self, comp, vals, labels):
        # First the GP to observations
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp,vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise
        
        # Now the GP to times
        timegp = gp.GP(self.cov_func.__name__)
        timegp.real_init(comp.shape[1], labels)
        timegp.optimize_hypers(comp, labels)
        self.constraint_mean  = timegp.mean
        self.constraint_amp2  = timegp.amp2
        self.constraint_noise = timegp.noise
        self.constraint_ls    = timegp.ls

        # Save hyperparameter samples
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.constraint_hyper_samples.append((self.constraint_mean, self.constraint_noise, self.constraint_amp2,
                                        self.constraint_ls))
        self.dump_hypers()
Esempio n. 25
0
import zmq
import Locker
import constraints as c
import cPickle
import commands

# The port to which this server will listen
PORT = '5060'
# Setting up 0mq to work as socket server
context = zmq.Context()
socket = context.socket(zmq.REP)
socket.bind("tcp://0.0.0.0:%s" % PORT)


lock = Locker.Lock(300)
# In testing mode no drive commands are executed
TESTING_MODE = True
#import Controller
#from Manual_Drive import *
# create controller entity
#controller = Controller.Controller()

#manualDrive = ManualDrive(controller.start_command,controller.forward,controller.backward,controller.left,controller.right,controller.stop)



# A dictionnary containing the possible commands (keys)
# Correct formated command : {'command':'NameCommand',ID:{}}
commands = {
'LOCK':{'nb_of_arguments':0,'function':func_lock},
'UNLOCK':{'nb_of_arguments':0,'function':func_unlock},
Esempio n. 26
0
import Locker
import constraints as c
import cPickle
from commands import *
DEBUG = True
scriptPath = os.path.realpath(os.path.dirname(sys.argv[0]))
os.chdir(scriptPath)
#append the relative location you want to import from
sys.path.append("../Socket")
import sockets_server

# Setting up a new a new socket server
socket = sockets_server.SocketServer(6001)
socket.start()
# Create a lock entity with a lock time of 20 minutes
lock = Locker.Lock(1200)
if not DEBUG:
    # Import controller, entity responsible for starting and stopping controller commands
    import Controller
    # Import commands that the controller can start
    import ControllerCommands
    # Import manual drive is entity to select the right controller command given the chosen keys
    from ManualDrive import *
    # create controller entity
    print 'Start controller'
    controller = Controller.Controller()
    manualDrive = ManualDrive(
        controller.start_command, ControllerCommands.forward,
        ControllerCommands.backward, ControllerCommands.left,
        ControllerCommands.right, ControllerCommands.forward_left,
        ControllerCommands.forward_right, ControllerCommands.backward_left,
class GPEIOptChooser:
    def __init__(self,
                 expt_dir,
                 covar="Matern52",
                 mcmc_iters=10,
                 pending_samples=100,
                 noiseless=False,
                 burnin=100,
                 grid_subset=20,
                 use_multiprocessing=True):
        self.cov_func = getattr(gp, covar)
        self.locker = Locker()
        self.state_pkl = os.path.join(expt_dir, self.__module__ + ".pkl")
        self.stats_file = os.path.join(
            expt_dir, self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters = int(mcmc_iters)
        self.burnin = int(burnin)
        self.needs_burnin = True
        self.pending_samples = int(pending_samples)
        self.D = -1
        self.hyper_iters = 1
        # Number of points to optimize EI over
        self.grid_subset = int(grid_subset)
        self.noiseless = bool(int(noiseless))
        self.hyper_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale = 1  # zero-mean log normal prior
        self.max_ls = 2  # top-hat prior on length scales

        # If multiprocessing fails or deadlocks, set this to False
        self.use_multiprocessing = bool(int(use_multiprocessing))

    def dump_hypers(self):
        self.locker.lock_wait(self.state_pkl)

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
        pickle.dump(
            {
                'dims': self.D,
                'ls': self.ls,
                'amp2': self.amp2,
                'noise': self.noise,
                'hyper_samples': self.hyper_samples,
                'mean': self.mean
            }, fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd)  # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

        # Write the hyperparameters out to a human readable file as well
        fh = open(self.stats_file, 'wt')
        fh.write('Mean Noise Amplitude <length scales>\n')
        fh.write('-----------ALL SAMPLES-------------\n')
        meanhyps = 0 * np.hstack(self.hyper_samples[0])
        for i in self.hyper_samples:
            hyps = np.hstack(i)
            meanhyps += (1 / float(len(self.hyper_samples))) * hyps
            for j in hyps:
                fh.write(str(j) + ' ')
            fh.write('\n')

        fh.write('-----------MEAN OF SAMPLES-------------\n')
        for j in meanhyps:
            fh.write(str(j) + ' ')
        fh.write('\n')
        fh.close()

    # This passes out html or javascript to display interesting
    # stats - such as the length scales (sensitivity to various
    # dimensions).
    def generate_stats_html(self):
        # Need this because the model may not necessarily be
        # initialized when this code is called.
        if not self._read_only():
            return 'Chooser not yet ready to display output'

        mean_mean = np.mean(np.vstack([h[0] for h in self.hyper_samples]))
        mean_noise = np.mean(np.vstack([h[1] for h in self.hyper_samples]))
        mean_ls = np.mean(
            np.vstack([h[3][np.newaxis, :] for h in self.hyper_samples]), 0)

        try:
            output = (
                '<br /><span class=\"label label-info\">Estimated mean:</span> '
                + str(mean_mean) +
                '<br /><span class=\"label label-info\">Estimated noise:</span> '
                + str(mean_noise) +
                '<br /><br /><span class=\"label label-info\">Inverse parameter sensitivity'
                + ' - Gaussian Process length scales</span><br /><br />' +
                '<div id=\"lschart\"></div><script type=\"text/javascript\">' +
                'var lsdata = [' + ','.join(['%.2f' % i
                                             for i in mean_ls]) + '];')
        except:
            return 'Chooser not yet ready to display output.'

        output += ('bar_chart("#lschart", lsdata, ' + str(self.max_ls) + ');' +
                   '</script>')
        return output

    # Read in the chooser from file. Returns True only on success
    def _read_only(self):
        if os.path.exists(self.state_pkl):
            fh = open(self.state_pkl, 'rb')
            state = pickle.load(fh)
            fh.close()

            self.D = state['dims']
            self.ls = state['ls']
            self.amp2 = state['amp2']
            self.noise = state['noise']
            self.mean = state['mean']
            self.hyper_samples = state['hyper_samples']
            self.needs_burnin = False
            return True

        return False

    def _real_init(self, dims, values):
        self.locker.lock_wait(self.state_pkl)

        self.randomstate = npr.get_state()
        if os.path.exists(self.state_pkl):
            fh = open(self.state_pkl, 'rb')
            state = pickle.load(fh)
            fh.close()

            self.D = state['dims']
            self.ls = state['ls']
            self.amp2 = state['amp2']
            self.noise = state['noise']
            self.mean = state['mean']
            self.hyper_samples = state['hyper_samples']
            self.needs_burnin = False
        else:

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values) + 1e-4

            # Initial observation noise.
            self.noise = 1e-3

            # Initial mean.
            self.mean = np.mean(values)

            # Save hyperparameter samples
            self.hyper_samples.append(
                (self.mean, self.noise, self.amp2, self.ls))

        self.locker.unlock(self.state_pkl)

    def cov(self, x1, x2=None):
        if x2 is None:
            return self.amp2 * (self.cov_func(self.ls, x1, None) +
                                1e-6 * np.eye(x1.shape[0]))
        else:
            return self.amp2 * self.cov_func(self.ls, x1, x2)

    # Given a set of completed 'experiments' in the unit hypercube with
    # corresponding objective 'values', pick from the next experiment to
    # run according to the acquisition function.
    def next(self, grid, values, durations, candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete])

        # Grab out the relevant sets.
        comp = grid[complete, :]
        cand = grid[candidates, :]
        pend = grid[pending, :]
        vals = values[complete]
        numcand = cand.shape[0]

        # Spray a set of candidates around the min so far
        best_comp = np.argmin(vals)
        cand2 = np.vstack(
            (np.random.randn(10, comp.shape[1]) * 0.001 + comp[best_comp, :],
             cand))

        if self.mcmc_iters > 0:

            # Possibly burn in.
            if self.needs_burnin:
                for mcmc_iter in range(self.burnin):
                    self.sample_hypers(comp, vals)
                    log("BURN %d/%d] mean: %.2f  amp: %.2f "
                        "noise: %.4f  min_ls: %.4f  max_ls: %.4f" %
                        (mcmc_iter + 1, self.burnin, self.mean,
                         np.sqrt(self.amp2), self.noise, np.min(
                             self.ls), np.max(self.ls)))
                self.needs_burnin = False

            # Sample from hyperparameters.
            # Adjust the candidates to hit ei peaks
            self.hyper_samples = []
            for mcmc_iter in range(self.mcmc_iters):
                self.sample_hypers(comp, vals)
                log("%d/%d] mean: %.2f  amp: %.2f  noise: %.4f "
                    "min_ls: %.4f  max_ls: %.4f" %
                    (mcmc_iter + 1, self.mcmc_iters, self.mean,
                     np.sqrt(self.amp2), self.noise, np.min(
                         self.ls), np.max(self.ls)))
            self.dump_hypers()

            b = []  # optimization bounds
            for i in range(0, cand.shape[1]):
                b.append((0, 1))

            overall_ei = self.ei_over_hypers(comp, pend, cand2, vals)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds, :]

            # Optimize each point in parallel
            if self.use_multiprocessing:
                pool = multiprocessing.Pool(self.grid_subset)
                results = [
                    pool.apply_async(optimize_pt,
                                     args=(c, b, comp, pend, vals,
                                           copy.copy(self))) for c in cand2
                ]
                for res in results:
                    cand = np.vstack((cand, res.get(1e8)))
                pool.close()
            else:
                # This is old code to optimize each point in parallel.
                for i in range(0, cand2.shape[0]):
                    log("Optimizing candidate %d/%d" % (i + 1, cand2.shape[0]))
                    #self.check_grad_ei(cand2[i,:].flatten(), comp, pend, vals)
                    ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers,
                                            cand2[i, :].flatten(),
                                            args=(comp, pend, vals),
                                            bounds=b,
                                            disp=0)
                    cand2[i, :] = ret[0]
                cand = np.vstack((cand, cand2))

            overall_ei = self.ei_over_hypers(comp, pend, cand, vals)
            best_cand = np.argmax(np.mean(overall_ei, axis=1))

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand, :])

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            self.optimize_hypers(comp, vals)

            log("mean: %.2f  amp: %.2f  noise: %.4f  "
                "min_ls: %.4f  max_ls: %.4f" % (self.mean, np.sqrt(
                    self.amp2), self.noise, np.min(self.ls), np.max(self.ls)))

            # Optimize over EI
            b = []  # optimization bounds
            for i in range(0, cand.shape[1]):
                b.append((0, 1))

            for i in range(0, cand2.shape[0]):
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei,
                                        cand2[i, :].flatten(),
                                        args=(comp, vals, True),
                                        bounds=b,
                                        disp=0)
                cand2[i, :] = ret[0]
            cand = np.vstack((cand, cand2))

            ei = self.compute_ei(comp, pend, cand, vals)
            best_cand = np.argmax(ei)

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand, :])

            return int(candidates[best_cand])

    # Compute EI over hyperparameter samples
    def ei_over_hypers(self, comp, pend, cand, vals):
        overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))
        for mcmc_iter in range(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]
            overall_ei[:, mcmc_iter] = self.compute_ei(comp, pend, cand, vals)
        return overall_ei

    def check_grad_ei(self, cand, comp, pend, vals):
        (ei, dx1) = self.grad_optimize_ei_over_hypers(cand, comp, pend, vals)
        dx2 = dx1 * 0
        idx = np.zeros(cand.shape[0])
        for i in range(0, cand.shape[0]):
            idx[i] = 1e-6
            (ei1,
             tmp) = self.grad_optimize_ei_over_hypers(cand + idx, comp, pend,
                                                      vals)
            (ei2,
             tmp) = self.grad_optimize_ei_over_hypers(cand - idx, comp, pend,
                                                      vals)
            dx2[i] = (ei - ei2) / (2 * 1e-6)
            idx[i] = 0
        print('computed grads', dx1)
        print('finite diffs', dx2)
        print((dx1 / dx2))
        print(np.sum((dx1 - dx2)**2))
        time.sleep(2)

    # Adjust points by optimizing EI over a set of hyperparameter samples
    def grad_optimize_ei_over_hypers(self,
                                     cand,
                                     comp,
                                     pend,
                                     vals,
                                     compute_grad=True):
        summed_ei = 0
        summed_grad_ei = np.zeros(cand.shape).flatten()
        ls = self.ls.copy()
        amp2 = self.amp2
        mean = self.mean
        noise = self.noise

        for hyper in self.hyper_samples:
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]
            if compute_grad:
                (ei, g_ei) = self.grad_optimize_ei(cand, comp, pend, vals,
                                                   compute_grad)
                summed_grad_ei = summed_grad_ei + g_ei
            else:
                ei = self.grad_optimize_ei(cand, comp, pend, vals,
                                           compute_grad)
            summed_ei += ei

        self.mean = mean
        self.amp2 = amp2
        self.noise = noise
        self.ls = ls.copy()

        if compute_grad:
            return (summed_ei, summed_grad_ei)
        else:
            return summed_ei

    # Adjust points based on optimizing their ei
    def grad_optimize_ei(self, cand, comp, pend, vals, compute_grad=True):
        if pend.shape[0] == 0:
            best = np.min(vals)
            cand = np.reshape(cand, (-1, comp.shape[1]))

            # The primary covariances for prediction.
            comp_cov = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.ls, comp, cand)

            # Predictive things.
            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            if not compute_grad:
                return ei

            # Gradients of ei w.r.t. mean and variance
            g_ei_m = -ncdf
            g_ei_s2 = 0.5 * npdf / func_s

            # Apply covariance function
            grad_cross = np.squeeze(cand_cross_grad)

            grad_xp_m = np.dot(alpha.transpose(), grad_cross)
            grad_xp_v = np.dot(
                -2 * spla.cho_solve((obsv_chol, True), cand_cross).transpose(),
                grad_cross)

            grad_xp = 0.5 * self.amp2 * (grad_xp_m * g_ei_m +
                                         grad_xp_v * g_ei_s2)
            ei = -np.sum(ei)

            return ei, grad_xp.flatten()

        else:
            # If there are pending experiments, fantasize their outcomes.
            cand = np.reshape(cand, (-1, comp.shape[1]))

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = (self.cov(comp_pend) +
                             self.noise * np.eye(comp_pend.shape[0]))
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            npr.set_state(self.randomstate)
            pend_fant = np.dot(
                pend_chol, npr.randn(pend.shape[0],
                                     self.pending_samples)) + pend_m[:, None]

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)
            cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            # Gradients of ei w.r.t. mean and variance
            g_ei_m = -ncdf
            g_ei_s2 = 0.5 * npdf / func_s

            # Apply covariance function
            # Squeeze can break the 1D case be careful
            if pend.shape[1] == 1:
                grad_cross = np.squeeze(cand_cross_grad, axis=(2, ))
            else:
                grad_cross = np.squeeze(cand_cross_grad)

            grad_xp_m = np.dot(alpha.transpose(), grad_cross)
            grad_xp_v = np.dot(
                -2 * spla.cho_solve(
                    (comp_pend_chol, True), cand_cross).transpose(),
                grad_cross)

            grad_xp = 0.5 * self.amp2 * (
                grad_xp_m * np.tile(g_ei_m, (comp.shape[1], 1)).T +
                (grad_xp_v.T * g_ei_s2).T)
            ei = -np.mean(ei, axis=1)
            grad_xp = np.mean(grad_xp, axis=0)

            return ei, grad_xp.flatten()

    def compute_ei(self, comp, pend, cand, vals):
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return ei
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = (self.cov(comp_pend) +
                             self.noise * np.eye(comp_pend.shape[0]))
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            npr.set_state(self.randomstate)
            pend_fant = np.dot(
                pend_chol, npr.randn(pend.shape[0],
                                     self.pending_samples)) + pend_m[:, None]

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return np.mean(ei, axis=1)

    def sample_hypers(self, comp, vals):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov = (
                self.amp2 *
                (self.cov_func(ls, comp, None) + 1e-6 * np.eye(comp.shape[0]))
                + self.noise * np.eye(comp.shape[0]))
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp = (-np.sum(np.log(np.diag(chol))) -
                  0.5 * np.dot(vals - self.mean, solve))
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov = (amp2 * (self.cov_func(self.ls, comp, None) +
                           1e-6 * np.eye(comp.shape[0])) +
                   noise * np.eye(comp.shape[0]))
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale / noise)**2))

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(np.sqrt(amp2)) / self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2,
                                             self.noise]),
                                   logprob,
                                   compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = hypers[2]

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = 1e-3

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0:
                return -np.inf

            cov = (amp2 * (self.cov_func(self.ls, comp, None) +
                           1e-6 * np.eye(comp.shape[0])) +
                   noise * np.eye(comp.shape[0]))
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(np.sqrt(amp2)) / self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2,
                                             self.noise]),
                                   logprob,
                                   compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = 1e-3

    def optimize_hypers(self, comp, vals):
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp, vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise

        # Save hyperparameter samples
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.dump_hypers()

        return
Esempio n. 28
0
class ExperimentGrid:

    @staticmethod
    def job_running(expt_dir, id):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_running(id)

    @staticmethod
    def job_complete(expt_dir, id, value, duration):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_complete(id, value, duration)

    @staticmethod
    def job_broken(expt_dir, id):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_broken(id)

    def __init__(self, expt_dir, variables=None, grid_size=None, grid_seed=1):
        self.expt_dir = expt_dir
        self.jobs_pkl = os.path.join(expt_dir, 'expt-grid.pkl')
        self.locker   = Locker()

        # Only one process at a time is allowed to have access to this.
        sys.stderr.write("Waiting to lock grid...")
        self.locker.lock_wait(self.jobs_pkl)
        sys.stderr.write("...acquired\n")

        # Does this exist already?
        if variables is not None and not os.path.exists(self.jobs_pkl):

            # Set up the grid for the first time.
            self.seed = grid_seed
            self.vmap   = GridMap(variables, grid_size)
            self.grid   = self.hypercube_grid(self.vmap.card(), grid_size)
            self.status = np.zeros(grid_size, dtype=int) + CANDIDATE_STATE
            self.values = np.zeros(grid_size) + np.nan
            self.durs   = np.zeros(grid_size) + np.nan
            self.sgeids = np.zeros(grid_size, dtype=int)

            # Save this out.
            self._save_jobs()
        else:

            # Load in from the pickle.
            self._load_jobs()

    def __del__(self):
        self._save_jobs()
        if self.locker.unlock(self.jobs_pkl):
            sys.stderr.write("Released lock on job grid.\n")
        else:
            raise Exception("Could not release lock on job grid.\n")

    def get_grid(self):
        return self.grid, self.values, self.durs

    def get_candidates(self):
        return np.nonzero(self.status == CANDIDATE_STATE)[0]

    def get_pending(self):
        return np.nonzero((self.status == SUBMITTED_STATE) | (self.status == RUNNING_STATE))[0]

    def get_complete(self):
        return np.nonzero(self.status == COMPLETE_STATE)[0]

    def get_broken(self):
        return np.nonzero(self.status == BROKEN_STATE)[0]

    def get_params(self, index):
        return self.vmap.get_params(self.grid[index,:])

    def get_best(self):
        finite = self.values[np.isfinite(self.values)]
        if len(finite) > 0:
            cur_min = np.min(finite)
            index   = np.nonzero(self.values==cur_min)[0][0]
            return cur_min, index
        else:
            return np.nan, -1

    def get_sgeid(self, id):
        return self.sgeids[id]

    def add_to_grid(self, candidate):
        # Set up the grid
        self.grid   = np.vstack((self.grid, candidate))
        self.status = np.append(self.status, np.zeros(1, dtype=int) + 
                                int(CANDIDATE_STATE))
        
        self.values = np.append(self.values, np.zeros(1)+np.nan)
        self.durs   = np.append(self.durs, np.zeros(1)+np.nan)
        self.sgeids = np.append(self.sgeids, np.zeros(1,dtype=int))

        # Save this out.
        self._save_jobs()
        return self.grid.shape[0]-1

    def set_candidate(self, id):
        self.status[id] = CANDIDATE_STATE
        self._save_jobs()

    def set_submitted(self, id, sgeid):
        self.status[id] = SUBMITTED_STATE
        self.sgeids[id] = sgeid
        self._save_jobs()

    def set_running(self, id):
        self.status[id] = RUNNING_STATE
        self._save_jobs()

    def set_complete(self, id, value, duration):
        self.status[id] = COMPLETE_STATE
        self.values[id] = value
        self.durs[id]   = duration
        self._save_jobs()

    def set_broken(self, id):
        self.status[id] = BROKEN_STATE
        self._save_jobs()

    def _load_jobs(self):
        fh   = open(self.jobs_pkl, 'r')
        jobs = cPickle.load(fh)
        fh.close()

        self.vmap   = jobs['vmap']
        self.grid   = jobs['grid']
        self.status = jobs['status']
        self.values = jobs['values']
        self.durs   = jobs['durs']
        self.sgeids = jobs['sgeids']

    def _save_jobs(self):

        # Write everything to a temporary file first.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump({ 'vmap'   : self.vmap,
                       'grid'   : self.grid,
                       'status' : self.status,
                       'values' : self.values,
                       'durs'   : self.durs,
                       'sgeids' : self.sgeids }, fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.jobs_pkl)
        os.system(cmd) # TODO: Should check system-dependent return status.
    
    def _hypercube_grid(self, dims, size):
        # Generate from a sobol sequence
        sobol_grid = np.transpose(i4_sobol_generate(dims,size,self.seed))
                
        return sobol_grid
Esempio n. 29
0
def reset():
    # Kill running workers.
    if os.path.exists('expt-grid.pkl'):
        try:
            locker = Locker.Locker()
            locker.lock(
                os.path.join(os.path.realpath('.'), 'expt-grid.pkllock'))
            with open('expt-grid.pkl', 'r') as f:
                expt_grid = pickle.load(f)
            for proc_ind in xrange(expt_grid['sgeids'].shape[0]):
                if expt_grid['status'][
                        proc_ind] == ExperimentGrid.RUNNING_STATE:
                    print 'Killing process with id: %s' % expt_grid['sgeids'][
                        proc_ind]
                    try:
                        subprocess.check_call('taskkill /PID %s /F /T' %
                                              expt_grid['sgeids'][proc_ind])
                    except:
                        print 'Couldnt kill process with id: %s' % expt_grid[
                            'sgeids'][proc_ind]
        except Exception as e:
            print 'Couldnt clean up processes: %s.' % e.message
    # Clean up.
    # Jobs.
    if os.path.exists('jobs'):
        try:
            shutil.rmtree('jobs')
        except:
            print 'Couldnt remove jobs folder'
    # Outputs.
    if os.path.exists('output'):
        try:
            shutil.rmtree('output')
        except:
            try:
                time.sleep(5)
                shutil.rmtree('output')
            except:
                print 'Couldnt remove output folder'
    # Best result.
    if os.path.exists('best_job_and_result.txt'):
        try:
            os.remove('best_job_and_result.txt')
        except:
            print 'Couldnt remove best job file'
    # Experiment grid.
    if os.path.exists("expt-grid.pkl"):
        try:
            os.remove('expt-grid.pkl')
        except:
            print 'Couldnt remove experiment grid.'
    # GPEIOptChooser files.
    if os.path.exists('GPEIOptChooser.pkl'):
        try:
            os.remove('GPEIOptChooser.pkl')
            os.remove('GPEIOptChooser_hyperparameters.txt')
        except:
            print 'Couldnt remove GPEIOptChooser files.'
    # Trace.
    if os.path.exists('trace.csv'):
        try:
            os.remove('trace.csv')
        except:
            print 'Couldnt remove jobs folder'
class GPConstrainedEIChooser:
    def __init__(
        self,
        expt_dir,
        covar="Matern52",
        mcmc_iters=20,
        pending_samples=100,
        noiseless=False,
        burnin=100,
        grid_subset=20,
        constraint_violating_value=np.inf,
        verbosity=0,
        visualize2D=False,
    ):
        self.cov_func = getattr(gp, covar)
        self.locker = Locker()
        self.state_pkl = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.stats_file = os.path.join(expt_dir, self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters = int(mcmc_iters)
        self.burnin = int(burnin)
        self.needs_burnin = True
        self.pending_samples = pending_samples
        self.D = -1
        self.hyper_iters = 1
        # Number of points to optimize EI over
        self.grid_subset = int(grid_subset)
        self.noiseless = bool(int(noiseless))
        self.hyper_samples = []
        self.constraint_hyper_samples = []
        self.ff = None
        self.ff_samples = []
        self.verbosity = int(verbosity)

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale = 1  # zero-mean log normal prior
        self.max_ls = 2  # top-hat prior on length scales

        self.constraint_noise_scale = 0.1  # horseshoe prior
        self.constraint_amp2_scale = 1  # zero-mean log normal prio
        self.constraint_gain = 1  # top-hat prior on length scales
        self.constraint_max_ls = 2  # top-hat prior on length scales
        self.bad_value = float(constraint_violating_value)
        self.visualize2D = visualize2D

    # A simple function to dump out hyperparameters to allow for a hot start
    # if the optimization is restarted.
    def dump_hypers(self):
        self.locker.lock_wait(self.state_pkl)

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode="wb", delete=False)
        pickle.dump(
            {
                "dims": self.D,
                "ls": self.ls,
                "amp2": self.amp2,
                "noise": self.noise,
                "mean": self.mean,
                "constraint_ls": self.constraint_ls,
                "constraint_amp2": self.constraint_amp2,
                "constraint_noise": self.constraint_noise,
                "constraint_mean": self.constraint_mean,
            },
            fh,
        )
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd)  # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

        # Write the hyperparameters out to a human readable file as well
        fh = open(self.stats_file, "w")
        fh.write("Mean Noise Amplitude <length scales>\n")
        fh.write("-----------ALL SAMPLES-------------\n")
        meanhyps = 0 * np.hstack(self.hyper_samples[0])
        for i in self.hyper_samples:
            hyps = np.hstack(i)
            meanhyps += (1 / float(len(self.hyper_samples))) * hyps
            for j in hyps:
                fh.write(str(j) + " ")
            fh.write("\n")

        fh.write("-----------MEAN OF SAMPLES-------------\n")
        for j in meanhyps:
            fh.write(str(j) + " ")
        fh.write("\n")
        fh.close()

    def _real_init(self, dims, values, durations):

        self.locker.lock_wait(self.state_pkl)

        self.randomstate = npr.get_state()
        if os.path.exists(self.state_pkl):
            fh = open(self.state_pkl, "rb")
            state = pickle.load(fh)
            fh.close()

            self.D = state["dims"]
            self.ls = state["ls"]
            self.amp2 = state["amp2"]
            self.noise = state["noise"]
            self.mean = state["mean"]
            self.constraint_ls = state["constraint_ls"]
            self.constraint_amp2 = state["constraint_amp2"]
            self.constraint_noise = state["constraint_noise"]
            self.constraint_mean = state["constraint_mean"]
            self.constraint_gain = state["constraint_gain"]
            self.needs_burnin = False
        else:

            # Identify constraint violations
            # Note that we'll treat NaNs and Infs as these values as well
            # as an optional user defined value
            goodvals = np.nonzero(np.logical_and(values != self.bad_value, np.isfinite(values)))[0]

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)
            self.constraint_ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values[goodvals]) + 1e-4
            self.constraint_amp2 = 1.0

            # Initial observation noise.
            self.noise = 1e-3
            self.constraint_noise = 1e-3
            self.constraint_gain = 1

            # Initial mean.
            self.mean = np.mean(values[goodvals])
            self.constraint_mean = 0.5

        self.locker.unlock(self.state_pkl)

    def cov(self, amp2, ls, x1, x2=None):
        if x2 is None:
            return amp2 * (self.cov_func(ls, x1, None) + 1e-6 * np.eye(x1.shape[0]))
        else:
            return amp2 * self.cov_func(ls, x1, x2)

    # Given a set of completed 'experiments' in the unit hypercube with
    # corresponding objective 'values', pick from the next experiment to
    # run according to the acquisition function.
    def next(self, grid, values, durations, candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Grab out the relevant sets.
        comp = grid[complete, :]
        cand = grid[candidates, :]
        pend = grid[pending, :]
        vals = values[complete]

        # Identify constraint violations
        # Note that we'll treat NaNs and Infs as these values as well
        # as an optional user defined value
        idx = np.logical_and(vals != self.bad_value, np.isfinite(vals))
        goodvals = np.nonzero(idx)[0]
        badvals = np.nonzero(np.logical_not(idx))[0]

        print("Found %d constraint violating jobs" % (badvals.shape[0]))

        # There's no point regressing on one observation
        print("Received %d valid results" % (goodvals.shape[0]))
        if goodvals.shape[0] < 2:
            return int(candidates[0])

        labels = np.zeros(vals.shape[0])
        labels[goodvals] = 1

        if np.sum(labels) < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete], durations[complete])

        # Spray a set of candidates around the min so far
        numcand = cand.shape[0]
        best_comp = np.argmin(vals)
        cand2 = np.vstack((np.random.randn(10, comp.shape[1]) * 0.001 + comp[best_comp, :], cand))

        if self.mcmc_iters > 0:

            # Possibly burn in.
            if self.needs_burnin:
                for mcmc_iter in range(self.burnin):
                    self.sample_constraint_hypers(comp, labels)
                    self.sample_hypers(comp[goodvals, :], vals[goodvals])
                    log(
                        "BURN %d/%d] mean: %.2f  amp: %.2f "
                        "noise: %.4f  min_ls: %.4f  max_ls: %.4f"
                        % (
                            mcmc_iter + 1,
                            self.burnin,
                            self.mean,
                            np.sqrt(self.amp2),
                            self.noise,
                            np.min(self.ls),
                            np.max(self.ls),
                        )
                    )
                self.needs_burnin = False

            # Sample from hyperparameters.
            # Adjust the candidates to hit ei/sec peaks
            self.hyper_samples = []
            for mcmc_iter in range(self.mcmc_iters):
                self.sample_constraint_hypers(comp, labels)
                self.sample_hypers(comp[goodvals, :], vals[goodvals])
                if self.verbosity > 0:
                    log(
                        "%d/%d] mean: %.2f  amp: %.2f noise: %.4f "
                        "min_ls: %.4f  max_ls: %.4f"
                        % (
                            mcmc_iter + 1,
                            self.mcmc_iters,
                            self.mean,
                            np.sqrt(self.amp2),
                            self.noise,
                            np.min(self.ls),
                            np.max(self.ls),
                        )
                    )

                    log(
                        "%d/%d] constraint_mean: %.2f "
                        "constraint_amp: %.2f "
                        "constraint_gain: %.4f "
                        "constraint_min_ls: %.4f "
                        "constraint_max_ls: "
                        "%.4f"
                        % (
                            mcmc_iter + 1,
                            self.mcmc_iters,
                            self.constraint_mean,
                            np.sqrt(self.constraint_amp2),
                            self.constraint_gain,
                            np.min(self.constraint_ls),
                            np.max(self.constraint_ls),
                        )
                    )
            self.dump_hypers()
            comp_preds = np.zeros(labels.shape[0]).flatten()

            preds = self.pred_constraint_voilation(cand, comp, labels).flatten()
            for ii in range(self.mcmc_iters):
                constraint_hyper = self.constraint_hyper_samples[ii]
                self.ff = self.ff_samples[ii]
                self.constraint_mean = constraint_hyper[0]
                self.constraint_gain = constraint_hyper[1]
                self.constraint_amp2 = constraint_hyper[2]
                self.constraint_ls = constraint_hyper[3]
                comp_preds += self.pred_constraint_voilation(comp, comp, labels).flatten()
            comp_preds = comp_preds / float(self.mcmc_iters)
            print(
                "Predicting %.2f%% constraint violations (%d/%d): "
                % (np.mean(preds < 0.5) * 100, np.sum(preds < 0.5), preds.shape[0])
            )
            if self.verbosity > 0:
                print(
                    "Prediction` %f%% train accuracy (%d/%d): "
                    % (np.mean((comp_preds > 0.5) == labels), np.sum((comp_preds > 0.5) == labels), comp_preds.shape[0])
                )

            if self.visualize2D:
                delta = 0.025
                x = np.arange(0, 1.0, delta)
                y = np.arange(0, 1.0, delta)
                X, Y = np.meshgrid(x, y)

                cpreds = np.zeros((X.shape[0], X.shape[1]))
                predei = np.zeros((X.shape[0], X.shape[1]))
                predei2 = np.zeros((X.shape[0], X.shape[1]))
                for ii in range(self.mcmc_iters):
                    constraint_hyper = self.constraint_hyper_samples[ii]
                    self.ff = self.ff_samples[ii]
                    self.constraint_mean = constraint_hyper[0]
                    self.constraint_gain = constraint_hyper[1]
                    self.constraint_amp2 = constraint_hyper[2]
                    self.constraint_ls = constraint_hyper[3]

                    cpred = self.pred_constraint_voilation(
                        np.hstack((X.flatten()[:, np.newaxis], Y.flatten()[:, np.newaxis])), comp, labels
                    )
                    pei = self.compute_constrained_ei(
                        comp, pend, np.hstack((X.flatten()[:, np.newaxis], Y.flatten()[:, np.newaxis])), vals, labels
                    )
                    pei2 = self.compute_ei(
                        comp, pend, np.hstack((X.flatten()[:, np.newaxis], Y.flatten()[:, np.newaxis])), vals, labels
                    )

                    cpreds += np.reshape(cpred, (X.shape[0], X.shape[1]))
                    predei += np.reshape(pei, (X.shape[0], X.shape[1]))
                    predei2 += np.reshape(pei2, (X.shape[0], X.shape[1]))

                plt.figure(1)
                plt.clf()
                cpreds = cpreds / float(self.mcmc_iters)
                CS = plt.contour(X, Y, cpreds)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0, 0], comp[labels == 0, 1], "rx")
                plt.plot(comp[labels == 1, 0], comp[labels == 1, 1], "bx")
                plt.title("Contours of Classification GP (Prob of not being a " "constraint violation)")
                plt.legend(("Constraint Violations", "Good points"), "lower left")
                plt.savefig("constrained_ei_chooser_class_contour.pdf")

                plt.figure(2)
                plt.clf()
                predei = predei / float(self.mcmc_iters)
                CS = plt.contour(X, Y, predei)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0, 0], comp[labels == 0, 1], "rx")
                plt.plot(comp[labels == 1, 0], comp[labels == 1, 1], "bx")
                plt.title("Contours of EI*P(not violating constraint)")
                plt.legend(("Constraint Violations", "Good points"), "lower left")
                plt.savefig("constrained_ei_chooser_eitimesprob_contour.pdf")

                plt.figure(3)
                plt.clf()
                predei2 = predei2 / float(self.mcmc_iters)
                CS = plt.contour(X, Y, predei2)
                plt.clabel(CS, inline=1, fontsize=10)
                plt.plot(comp[labels == 0, 0], comp[labels == 0, 1], "rx")
                plt.plot(comp[labels == 1, 0], comp[labels == 1, 1], "bx")
                plt.title("Contours of EI")
                plt.legend(("Constraint Violations", "Good points"), "lower left")
                plt.savefig("constrained_ei_chooser_ei_contour.pdf")
                # plt.show()

            # Pick the top candidates to optimize over
            overall_ei = self.ei_over_hypers(comp, pend, cand2, vals, labels)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset :]
            cand2 = cand2[inds, :]

            # Adjust the candidates to hit ei peaks
            b = []  # optimization bounds
            for i in range(0, cand.shape[1]):
                b.append((0, 1))

            # Optimize each point in parallel
            pool = multiprocessing.Pool(self.grid_subset)
            results = [
                pool.apply_async(optimize_pt, args=(c, b, comp, pend, vals, labels, copy.copy(self))) for c in cand2
            ]
            for res in results:
                cand = np.vstack((cand, res.get(1024)))
            pool.close()

            # for i in xrange(0, cand2.shape[0]):
            #    log("Optimizing candidate %d/%d\n" %
            #                     (i+1, cand2.shape[0]))
            #    self.check_grad_ei(cand2[i,:], comp, pend, vals, labels)
            #    ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers,
            #                            cand2[i,:].flatten(),
            #                            args=(comp,pend,vals,labels,True),
            #                            bounds=b, disp=0)
            #    cand2[i,:] = ret[0]

            cand = np.vstack((cand, cand2))

            overall_ei = self.ei_over_hypers(comp, pend, cand, vals, labels)
            best_cand = np.argmax(np.mean(overall_ei, axis=1))

            self.dump_hypers()
            if best_cand >= numcand:
                return (int(numcand), cand[best_cand, :])

            return int(candidates[best_cand])

        else:
            print("This Chooser module permits only slice sampling with > 0 " "samples.")
            raise Exception("mcmc_iters <= 0")

    # Predict constraint voilating points
    def pred_constraint_voilation(self, cand, comp, vals):
        # The primary covariances for prediction.
        comp_cov = self.cov(self.constraint_amp2, self.constraint_ls, comp)
        cand_cross = self.cov(self.constraint_amp2, self.constraint_ls, comp, cand)

        # Compute the required Cholesky.
        obsv_cov = comp_cov + self.constraint_noise * np.eye(comp.shape[0])
        obsv_chol = spla.cholesky(obsv_cov, lower=True)

        cov_grad_func = getattr(gp, "grad_" + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.constraint_ls, comp, cand)

        # Predictive things.
        # Solve the linear systems.
        alpha = spla.cho_solve((obsv_chol, True), self.ff)
        beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha)  # + self.constraint_mean
        func_m = sps.norm.cdf(func_m * self.constraint_gain)

        return func_m

    # Compute EI over hyperparameter samples
    def ei_over_hypers(self, comp, pend, cand, vals, labels):
        overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))
        for mcmc_iter in range(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            constraint_hyper = self.constraint_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.constraint_mean = constraint_hyper[0]
            self.constraint_gain = constraint_hyper[1]
            self.constraint_amp2 = constraint_hyper[2]
            self.constraint_ls = constraint_hyper[3]
            overall_ei[:, mcmc_iter] = self.compute_constrained_ei(comp, pend, cand, vals, labels)

        return overall_ei

    # Adjust points by optimizing EI over a set of hyperparameter samples
    def grad_optimize_ei_over_hypers(self, cand, comp, pend, vals, labels, compute_grad=True):
        summed_ei = 0
        summed_grad_ei = np.zeros(cand.shape).flatten()

        for mcmc_iter in range(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            constraint_hyper = self.constraint_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.constraint_mean = constraint_hyper[0]
            self.constraint_gain = constraint_hyper[1]
            self.constraint_amp2 = constraint_hyper[2]
            self.constraint_ls = constraint_hyper[3]
            if compute_grad:
                (ei, g_ei) = self.grad_optimize_ei(cand, comp, pend, vals, labels, compute_grad)
                summed_grad_ei = summed_grad_ei + g_ei
            else:
                ei = self.grad_optimize_ei(cand, comp, pend, vals, labels, compute_grad)

            summed_ei += ei

        if compute_grad:
            return (summed_ei, summed_grad_ei)
        else:
            return summed_ei

    def check_grad_ei(self, cand, comp, pend, vals, labels):
        (ei, dx1) = self.grad_optimize_ei_over_hypers(cand, comp, pend, vals, labels)
        dx2 = dx1 * 0
        idx = np.zeros(cand.shape[0])
        for i in range(0, cand.shape[0]):
            idx[i] = 1e-6
            (ei1, tmp) = self.grad_optimize_ei_over_hypers(cand + idx, comp, pend, vals, labels)
            (ei2, tmp) = self.grad_optimize_ei_over_hypers(cand - idx, comp, pend, vals, labels)
            dx2[i] = (ei - ei2) / (2 * 1e-6)
            idx[i] = 0
        print("computed grads", dx1)
        print("finite diffs", dx2)
        print((dx1 / dx2))
        print(np.sum((dx1 - dx2) ** 2))
        time.sleep(2)

    def grad_optimize_ei(self, cand, comp, pend, vals, labels, compute_grad=True):
        if pend.shape[0] == 0:
            return self.grad_optimize_ei_nopend(cand, comp, vals, labels, compute_grad=True)
        else:
            return self.grad_optimize_ei_pend(cand, comp, pend, vals, labels, compute_grad=True)

    def grad_optimize_ei_pend(self, cand, comp, pend, vals, labels, compute_grad=True):
        # Here we have to compute the gradients for constrained ei
        # This means deriving through the two kernels, the one for predicting
        # constraint violations and the one predicting ei

        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        # Use standard EI if there aren't enough observations of either
        # positive or negative constraint violations
        use_vanilla_ei = np.all(labels > 0) or np.all(labels <= 0)

        best = np.min(vals)
        cand = np.reshape(cand, (-1, comp.shape[1]))
        func_constraint_m = 1

        if not use_vanilla_ei:

            # First we make predictions for the durations
            # Compute covariances
            comp_constraint_cov = self.cov(self.constraint_amp2, self.constraint_ls, compfull)
            cand_constraint_cross = self.cov(self.constraint_amp2, self.constraint_ls, compfull, cand)

            # Cholesky decompositions
            obsv_constraint_cov = comp_constraint_cov + self.constraint_noise * np.eye(compfull.shape[0])
            obsv_constraint_chol = spla.cholesky(obsv_constraint_cov, lower=True)

            # Linear systems
            t_alpha = spla.cho_solve((obsv_constraint_chol, True), self.ff)

            # Predict marginal mean times and (possibly) variances
            ff = np.dot(cand_constraint_cross.T, t_alpha)

            # Squash through Gaussian cdf
            func_constraint_m = sps.norm.cdf(self.constraint_gain * ff)

            # Apply covariance function
            cov_grad_func = getattr(gp, "grad_" + self.cov_func.__name__)
            cand_cross_grad = cov_grad_func(self.constraint_ls, compfull, cand)
            grad_cross_t = np.squeeze(cand_cross_grad)

        # Now compute the gradients w.r.t. ei
        # The primary covariances for prediction.
        comp_cov = self.cov(self.amp2, self.ls, comp)
        cand_cross = self.cov(self.amp2, self.ls, comp, cand)
        comp_cov_full = self.cov(self.amp2, self.ls, compfull)
        cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

        # Create a composite vector of complete and pending.
        comp_pend = np.concatenate((comp, pend))

        # Compute the covariance and Cholesky decomposition.
        comp_pend_cov = self.cov(self.amp2, self.ls, comp_pend) + self.noise * np.eye(comp_pend.shape[0])
        comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

        # Compute submatrices.
        pend_cross = self.cov(self.amp2, self.ls, comp, pend)
        pend_kappa = self.cov(self.amp2, self.ls, pend)

        # Use the sub-Cholesky.
        obsv_chol = comp_pend_chol[: comp.shape[0], : comp.shape[0]]

        # Compute the required Cholesky.
        # obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
        # obsv_chol = spla.cholesky(obsv_cov, lower=True)
        obsv_cov_full = comp_cov_full + self.noise * np.eye(compfull.shape[0])
        obsv_chol_full = spla.cholesky(obsv_cov_full, lower=True)

        # Predictive things.
        # Solve the linear systems.
        alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
        beta = spla.cho_solve((obsv_chol, True), pend_cross)

        # Finding predictive means and variances.
        pend_m = np.dot(pend_cross.T, alpha) + self.mean
        pend_K = pend_kappa - np.dot(pend_cross.T, beta)

        # Take the Cholesky of the predictive covariance.
        pend_chol = spla.cholesky(pend_K, lower=True)

        # Make predictions.
        npr.set_state(self.randomstate)
        pend_fant = np.dot(pend_chol, npr.randn(pend.shape[0], self.pending_samples)) + pend_m[:, None]

        # Include the fantasies.
        fant_vals = np.concatenate((np.tile(vals[:, np.newaxis], (1, self.pending_samples)), pend_fant))

        # Compute bests over the fantasies.
        bests = np.min(fant_vals, axis=0)

        # Now generalize from these fantasies.
        cand_cross = self.cov(self.amp2, self.ls, comp_pend, cand)
        cov_grad_func = getattr(gp, "grad_" + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.ls, comp_pend, cand)

        # Solve the linear systems.
        alpha = spla.cho_solve((comp_pend_chol, True), fant_vals - self.mean)
        beta = spla.solve_triangular(comp_pend_chol, cand_cross, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2 * (1 + 1e-6) - np.sum(beta ** 2, axis=0)

        # beta  = spla.solve_triangular(obsv_chol_full, cand_cross_full,
        #                               lower=True)
        # beta   = spla.solve_triangular(obsv_chol, cand_cross,
        #                               lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2 * (1 + 1e-6) - np.sum(beta ** 2, axis=0)

        # Expected improvement
        func_s = np.sqrt(func_v)
        u = (best - func_m) / func_s
        ncdf = sps.norm.cdf(u)
        npdf = sps.norm.pdf(u)
        ei = func_s * (u * ncdf + npdf)

        constrained_ei = -np.sum(ei * func_constraint_m)
        if not compute_grad:
            return constrained_ei

        # Gradients of ei w.r.t. mean and variance
        g_ei_m = -ncdf
        g_ei_s2 = 0.5 * npdf / func_s

        # Apply covariance function
        grad_cross = np.squeeze(cand_cross_grad)

        grad_xp_m = np.dot(alpha.transpose(), grad_cross)
        grad_xp_v = np.dot(-2 * spla.cho_solve((comp_pend_chol, True), cand_cross).transpose(), grad_cross)

        grad_xp = 0.5 * self.amp2 * (grad_xp_m * np.tile(g_ei_m, (comp.shape[1], 1)).T + (grad_xp_v.T * g_ei_s2).T)

        grad_xp = np.sum(grad_xp, axis=0)

        if use_vanilla_ei:
            return -np.sum(ei), grad_xp.flatten()

        grad_constraint_xp_m = np.dot(t_alpha.transpose(), grad_cross_t)
        grad_constraint_xp_m = (
            0.5
            * self.constraint_amp2
            * self.constraint_gain
            * grad_constraint_xp_m
            * sps.norm.pdf(self.constraint_gain * ff)
        )

        grad_xp = func_constraint_m * grad_xp + np.sum(ei) * grad_constraint_xp_m

        return constrained_ei, grad_xp.flatten()

    def grad_optimize_ei_nopend(self, cand, comp, vals, labels, compute_grad=True):
        # Here we have to compute the gradients for constrained ei
        # This means deriving through the two kernels, the one for predicting
        # constraint violations and the one predicting ei

        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        # Use standard EI if there aren't enough observations of either
        # positive or negative constraint violations
        use_vanilla_ei = np.all(labels > 0) or np.all(labels <= 0)

        best = np.min(vals)
        cand = np.reshape(cand, (-1, comp.shape[1]))
        func_constraint_m = 1

        if not use_vanilla_ei:

            # First we make predictions for the durations
            # Compute covariances
            comp_constraint_cov = self.cov(self.constraint_amp2, self.constraint_ls, compfull)
            cand_constraint_cross = self.cov(self.constraint_amp2, self.constraint_ls, compfull, cand)

            # Cholesky decompositions
            obsv_constraint_cov = comp_constraint_cov + self.constraint_noise * np.eye(compfull.shape[0])
            obsv_constraint_chol = spla.cholesky(obsv_constraint_cov, lower=True)

            # Linear systems
            t_alpha = spla.cho_solve((obsv_constraint_chol, True), self.ff)

            # Predict marginal mean times and (possibly) variances
            ff = np.dot(cand_constraint_cross.T, t_alpha)

            # Squash through Gaussian cdf
            func_constraint_m = sps.norm.cdf(self.constraint_gain * ff)

        # Now compute the gradients w.r.t. ei
        # The primary covariances for prediction.
        comp_cov = self.cov(self.amp2, self.ls, comp)
        cand_cross = self.cov(self.amp2, self.ls, comp, cand)
        comp_cov_full = self.cov(self.amp2, self.ls, compfull)
        cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

        # Compute the required Cholesky.
        obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
        obsv_chol = spla.cholesky(obsv_cov, lower=True)
        obsv_cov_full = comp_cov_full + self.noise * np.eye(compfull.shape[0])
        obsv_chol_full = spla.cholesky(obsv_cov_full, lower=True)

        # Predictive things.
        # Solve the linear systems.
        alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
        beta = spla.solve_triangular(obsv_chol_full, cand_cross_full, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2 * (1 + 1e-6) - np.sum(beta ** 2, axis=0)

        # Expected improvement
        func_s = np.sqrt(func_v)
        u = (best - func_m) / func_s
        ncdf = sps.norm.cdf(u)
        npdf = sps.norm.pdf(u)
        ei = func_s * (u * ncdf + npdf)

        constrained_ei = -np.sum(ei * func_constraint_m)
        if not compute_grad:
            return constrained_ei

        # Gradients of ei w.r.t. mean and variance
        g_ei_m = -ncdf
        g_ei_s2 = 0.5 * npdf / func_s

        # Apply covariance function
        cov_grad_func = getattr(gp, "grad_" + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.ls, comp, cand)
        grad_cross = np.squeeze(cand_cross_grad)

        cand_cross_grad_full = cov_grad_func(self.ls, compfull, cand)
        grad_cross_full = np.squeeze(cand_cross_grad_full)

        grad_xp_m = np.dot(alpha.transpose(), grad_cross)
        grad_xp_v = np.dot(-2 * spla.cho_solve((obsv_chol_full, True), cand_cross_full).transpose(), grad_cross_full)

        grad_xp = 0.5 * self.amp2 * (grad_xp_m * g_ei_m + grad_xp_v * g_ei_s2)

        if use_vanilla_ei:
            return -np.sum(ei), grad_xp.flatten()

        # Apply constraint classifier
        cand_cross_grad = cov_grad_func(self.constraint_ls, compfull, cand)
        grad_cross_t = np.squeeze(cand_cross_grad)

        grad_constraint_xp_m = np.dot(t_alpha.transpose(), grad_cross_t)
        grad_constraint_xp_m = (
            0.5
            * self.constraint_amp2
            * self.constraint_gain
            * grad_constraint_xp_m
            * sps.norm.pdf(self.constraint_gain * ff)
        )

        grad_xp = func_constraint_m * grad_xp + ei * grad_constraint_xp_m

        return constrained_ei, grad_xp.flatten()

    def compute_constrained_ei(self, comp, pend, cand, vals, labels):
        # First we make predictions for the durations as that
        # doesn't depend on pending experiments
        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        # Use standard EI if there aren't enough observations of either
        # positive or negative constraint violations
        if np.all(labels > 0) or np.all(labels <= 0):
            func_constraint_m = 1
        else:
            # Compute covariances
            comp_constraint_cov = self.cov(self.constraint_amp2, self.constraint_ls, compfull)
            cand_constraint_cross = self.cov(self.constraint_amp2, self.constraint_ls, compfull, cand)

            # Cholesky decompositions
            obsv_constraint_cov = comp_constraint_cov + self.constraint_noise * np.eye(compfull.shape[0])
            obsv_constraint_chol = spla.cholesky(obsv_constraint_cov, lower=True)

            # Linear systems
            t_alpha = spla.cho_solve((obsv_constraint_chol, True), self.ff)
            t_beta = spla.solve_triangular(obsv_constraint_chol, cand_constraint_cross, lower=True)

            # Predict marginal mean times and (possibly) variances
            func_constraint_m = np.dot(cand_constraint_cross.T, t_alpha)

        # Squash through a probit
        func_constraint_m = sps.norm.cdf(self.constraint_gain * func_constraint_m)
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.
            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(self.amp2, self.ls, comp)
            comp_cov_full = self.cov(self.amp2, self.ls, compfull)
            cand_cross = self.cov(self.amp2, self.ls, comp, cand)
            cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_cov_full = comp_cov_full + self.noise * np.eye(compfull.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)
            obsv_chol_full = spla.cholesky(obsv_cov_full, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)
            # beta   = spla.solve_triangular(obsv_chol_full, cand_cross_full,
            #                               lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta ** 2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            constrained_ei = ei * func_constraint_m
            return constrained_ei
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = self.cov(self.amp2, self.ls, comp_pend) + self.noise * np.eye(comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(self.amp2, self.ls, comp, pend)
            pend_kappa = self.cov(self.amp2, self.ls, pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[: comp.shape[0], : comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = np.dot(pend_chol, npr.randn(pend.shape[0], self.pending_samples)) + pend_m[:, None]

            # Include the fantasies.
            fant_vals = np.concatenate((np.tile(vals[:, np.newaxis], (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(self.amp2, self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True), fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta ** 2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return np.mean(ei, axis=1) * func_constraint_m

    def compute_ei(self, comp, pend, cand, vals, labels):
        # First we make predictions for the durations as that
        # doesn't depend on pending experiments
        # First pull out violating points
        compfull = comp.copy()
        comp = comp[labels > 0, :]
        vals = vals[labels > 0]

        # Compute covariances
        comp_constraint_cov = self.cov(self.constraint_amp2, self.constraint_ls, compfull)
        cand_constraint_cross = self.cov(self.constraint_amp2, self.constraint_ls, compfull, cand)

        # Cholesky decompositions
        obsv_constraint_cov = comp_constraint_cov + self.constraint_noise * np.eye(compfull.shape[0])
        obsv_constraint_chol = spla.cholesky(obsv_constraint_cov, lower=True)

        # Linear systems
        t_alpha = spla.cho_solve((obsv_constraint_chol, True), self.ff)

        # Predict marginal mean times and (possibly) variances
        func_constraint_m = np.dot(cand_constraint_cross.T, t_alpha)

        # Squash through a probit to get prob of not violating a constraint
        func_constraint_m = 1.0 / (1 + np.exp(-self.constraint_gain * func_constraint_m))

        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(self.amp2, self.ls, comp)
            comp_cov_full = self.cov(self.amp2, self.ls, compfull)
            cand_cross = self.cov(self.amp2, self.ls, comp, cand)
            cand_cross_full = self.cov(self.amp2, self.ls, compfull, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_cov_full = comp_cov_full + self.noise * np.eye(compfull.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)
            obsv_chol_full = spla.cholesky(obsv_cov_full, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)
            # beta   = spla.solve_triangular(obsv_chol_full, cand_cross_full,
            # lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta ** 2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return ei
        else:
            return 0

    def sample_constraint_hypers(self, comp, labels):
        # The latent GP projection
        # The latent GP projection
        if self.ff is None or self.ff.shape[0] < comp.shape[0]:
            self.ff_samples = []
            comp_cov = self.cov(self.constraint_amp2, self.constraint_ls, comp)
            obsv_cov = comp_cov + 1e-6 * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)
            self.ff = np.dot(obsv_chol, npr.randn(obsv_chol.shape[0]))

        self._sample_constraint_noisy(comp, labels)
        self._sample_constraint_ls(comp, labels)
        self.constraint_hyper_samples.append(
            (self.constraint_mean, self.constraint_gain, self.constraint_amp2, self.constraint_ls)
        )
        self.ff_samples.append(self.ff)

    def sample_hypers(self, comp, vals):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)

        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov = self.amp2 * (self.cov_func(ls, comp, None) + 1e-6 * np.eye(comp.shape[0])) + self.noise * np.eye(
                comp.shape[0]
            )
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(vals - self.mean, solve)
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_constraint_ls(self, comp, vals):
        def lpProbit(ff, gain=self.constraint_gain):
            probs = sps.norm.cdf(ff * gain)
            probs[probs <= 0] = 1e-12
            probs[probs >= 1] = 1 - 1e-12
            llh = np.sum(vals * np.log(probs) + (1 - vals) * np.log(1 - probs))

            return llh

        def lpSigmoid(ff, gain=self.constraint_gain):
            probs = 1.0 / (1.0 + np.exp(-gain * ff))
            probs[probs <= 0] = 1e-12
            probs[probs >= 1] = 1 - 1e-12
            llh = np.sum(vals * np.log(probs) + (1 - vals) * np.log(1 - probs))
            return llh

        def updateGain(gain):
            if gain < 0.01 or gain > 10:
                return -np.inf

            cov = self.constraint_amp2 * (
                self.cov_func(self.constraint_ls, comp, None) + 1e-6 * np.eye(comp.shape[0])
            ) + self.constraint_noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals)
            lp = lpProbit(self.ff, gain)

            return lp

        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.constraint_max_ls):
                return -np.inf

            cov = self.constraint_amp2 * (
                self.cov_func(ls, comp, None) + 1e-6 * np.eye(comp.shape[0])
            ) + self.constraint_noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), self.ff)
            lp = lpProbit(self.ff)

            return lp

        hypers = util.slice_sample(self.constraint_ls, logprob, compwise=True)
        self.constraint_ls = hypers

        cov = self.constraint_amp2 * (
            self.cov_func(self.constraint_ls, comp, None) + 1e-6 * np.eye(comp.shape[0])
        ) + self.constraint_noise * np.eye(comp.shape[0])
        chol = spla.cholesky(cov, lower=False)
        ff = self.ff
        for jj in range(20):
            (ff, lpell) = self.elliptical_slice(ff, chol, lpProbit)

        self.ff = ff

        # Update gain
        hypers = util.slice_sample(np.array([self.constraint_gain]), updateGain, compwise=True)
        self.constraint_gain = hypers[0]

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov = amp2 * (
                (self.cov_func(self.ls, comp, None) + 1e-6 * np.eye(comp.shape[0])) + noise * np.eye(comp.shape[0])
            )
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(vals - mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale / noise) ** 2))

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.amp2_scale) ** 2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = hypers[2]

    def _sample_constraint_noisy(self, comp, vals):
        def lpProbit(ff, gain=self.constraint_gain):
            probs = sps.norm.cdf(ff * gain)
            probs[probs <= 0] = 1e-12
            probs[probs >= 1] = 1 - 1e-12
            llh = np.sum(vals * np.log(probs) + (1 - vals) * np.log(1 - probs))
            if np.any(np.isnan(probs)):
                print(probs)
            return llh

        def lpSigmoid(ff, gain=self.constraint_gain):
            probs = 1.0 / (1.0 + np.exp(-gain * ff))
            probs[probs <= 0] = 1e-12
            probs[probs >= 1] = 1 - 1e-12
            llh = np.sum(vals * np.log(probs) + (1 - vals) * np.log(1 - probs))
            return llh

        def logprob(hypers):
            amp2 = hypers[0]
            ff = hypers[1:]

            if amp2 < 0:
                return -np.inf

            noise = self.constraint_noise
            cov = amp2 * (
                self.cov_func(self.constraint_ls, comp, None) + 1e-6 * np.eye(comp.shape[0])
            ) + noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), ff)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(ff, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.constraint_amp2_scale) ** 2

            lp += lpProbit(ff, self.constraint_gain)

            return lp

        hypers = util.slice_sample(np.hstack((np.array([self.constraint_amp2]), self.ff)), logprob, compwise=False)
        self.constraint_amp2 = hypers[0]
        self.ff = hypers[1:]
        cov = self.constraint_amp2 * (
            (self.cov_func(self.constraint_ls, comp, None) + 1e-6 * np.eye(comp.shape[0]))
            + self.constraint_noise * np.eye(comp.shape[0])
        )
        chol = spla.cholesky(cov, lower=False)
        ff = self.ff
        for jj in range(50):
            (ff, lpell) = self.elliptical_slice(ff, chol, lpProbit)
        self.ff = ff

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = 1e-3

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0:
                return -np.inf

            cov = amp2 * (
                (self.cov_func(self.ls, comp, None) + 1e-6 * np.eye(comp.shape[0])) + noise * np.eye(comp.shape[0])
            )
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(vals - mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.amp2_scale) ** 2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = 1e-3

    def elliptical_slice(self, xx, chol_Sigma, log_like_fn, cur_log_like=None, angle_range=0):
        D = xx.shape[0]

        if cur_log_like is None:
            cur_log_like = log_like_fn(xx)

        nu = np.dot(chol_Sigma.T, np.random.randn(D, 1)).flatten()
        hh = np.log(np.random.rand()) + cur_log_like

        # Set up a bracket of angles and pick a first proposal.
        # "phi = (theta'-theta)" is a change in angle.
        if angle_range <= 0:
            # Bracket whole ellipse with both edges at first proposed point
            phi = np.random.rand() * 2 * math.pi
            phi_min = phi - 2 * math.pi
            phi_max = phi
        else:
            # Randomly center bracket on current point
            phi_min = -angle_range * np.random.rand()
            phi_max = phi_min + angle_range
            phi = np.random.rand() * (phi_max - phi_min) + phi_min

        # Slice sampling loop
        while True:
            # Compute xx for proposed angle difference
            # and check if it's on the slice
            xx_prop = xx * np.cos(phi) + nu * np.sin(phi)

            cur_log_like = log_like_fn(xx_prop)
            if cur_log_like > hh:
                # New point is on slice, ** EXIT LOOP **
                break

            # Shrink slice to rejected point
            if phi > 0:
                phi_max = phi
            elif phi < 0:
                phi_min = phi
            else:
                raise Exception("BUG DETECTED: Shrunk to current position " "and still not acceptable.")

            # Propose new angle difference
            phi = np.random.rand() * (phi_max - phi_min) + phi_min

        xx = xx_prop
        return (xx, cur_log_like)
Esempio n. 31
0
class GPEIperSecChooser:

    def __init__(self, expt_dir, covar="Matern52", mcmc_iters=10,
                 pending_samples=100, noiseless=False, burnin=100,
                 grid_subset=20):
        self.cov_func        = getattr(gp, covar)
        self.locker          = Locker()
        self.state_pkl       = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.stats_file      = os.path.join(expt_dir,
                                   self.__module__ + "_hyperparameters.txt")
        self.mcmc_iters      = int(mcmc_iters)
        self.burnin          = int(burnin)
        self.needs_burnin    = True
        self.pending_samples = pending_samples
        self.D               = -1
        self.hyper_iters     = 1
        # Number of points to optimize EI over
        self.grid_subset     = int(grid_subset)
        self.noiseless       = bool(int(noiseless))
        self.hyper_samples = []
        self.time_hyper_samples = []

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale  = 1    # zero-mean log normal prior
        self.max_ls      = 10    # top-hat prior on length scales

        self.time_noise_scale = 0.1  # horseshoe prior
        self.time_amp2_scale  = 1    # zero-mean log normal prior
        self.time_max_ls      = 10   # top-hat prior on length scales

    # A simple function to dump out hyperparameters to allow for a hot start
    # if the optimization is restarted.
    def dump_hypers(self):
        self.locker.lock_wait(self.state_pkl)

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump({ 'dims'        : self.D,
                       'ls'          : self.ls,
                       'amp2'        : self.amp2,
                       'noise'       : self.noise,
                       'mean'        : self.mean,
                       'time_ls'     : self.time_ls,
                       'time_amp2'   : self.time_amp2,
                       'time_noise'  : self.time_noise,
                       'time_mean'   : self.time_mean },
                     fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd) # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

    def _real_init(self, dims, values, durations):
        self.locker.lock_wait(self.state_pkl)

        if os.path.exists(self.state_pkl):
            fh    = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D          = state['dims']
            self.ls         = state['ls']
            self.amp2       = state['amp2']
            self.noise      = state['noise']
            self.mean       = state['mean']
            self.time_ls    = state['time_ls']
            self.time_amp2  = state['time_amp2']
            self.time_noise = state['time_noise']
            self.time_mean  = state['time_mean']
        else:

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)
            self.time_ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values)+1e-4
            self.time_amp2 = np.std(durations)+1e-4

            # Initial observation noise.
            self.noise = 1e-3
            self.time_noise = 1e-3

            # Initial mean.
            self.mean = np.mean(values)
            self.time_mean = np.mean(np.log(durations))

        self.locker.unlock(self.state_pkl)

    def cov(self, amp2, ls, x1, x2=None):
        if x2 is None:
            return amp2 * (self.cov_func(ls, x1, None)
                           + 1e-6*np.eye(x1.shape[0]))
        else:
            return amp2 * self.cov_func(ls, x1, x2)

    # Given a set of completed 'experiments' in the unit hypercube with
    # corresponding objective 'values', pick from the next experiment to
    # run according to the acquisition function.
    def next(self, grid, values, durations,
             candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete],
                            durations[complete])

        # Grab out the relevant sets.
        comp = grid[complete,:]
        cand = grid[candidates,:]
        pend = grid[pending,:]
        vals = values[complete]
        durs = durations[complete]

        # Bring time into the log domain before we do anything
        # to maintain strict positivity
        durs = np.log(durs)

        # Spray a set of candidates around the min so far
        numcand = cand.shape[0]
        best_comp = np.argmin(vals)
        cand2 = np.vstack((np.random.randn(10,comp.shape[1])*0.001 +
                           comp[best_comp,:], cand))

        if self.mcmc_iters > 0:

            # Possibly burn in.
            if self.needs_burnin:
                for mcmc_iter in range(self.burnin):
                    self.sample_hypers(comp, vals, durs)
                    log("BURN %d/%d] mean: %.2f  amp: %.2f "
                                     "noise: %.4f  min_ls: %.4f  max_ls: %.4f"
                                     % (mcmc_iter+1, self.burnin, self.mean,
                                        np.sqrt(self.amp2), self.noise,
                                        np.min(self.ls), np.max(self.ls)))
                self.needs_burnin = False

            # Sample from hyperparameters.
            # Adjust the candidates to hit ei/sec peaks
            self.hyper_samples = []
            for mcmc_iter in range(self.mcmc_iters):
                self.sample_hypers(comp, vals, durs)
                log("%d/%d] mean: %.2f  amp: %.2f  noise: %.4f "
                                 "min_ls: %.4f  max_ls: %.4f"
                                 % (mcmc_iter+1, self.mcmc_iters, self.mean,
                                    np.sqrt(self.amp2), self.noise,
                                    np.min(self.ls), np.max(self.ls)))

                log("%d/%d] time_mean: %.2fs time_amp: %.2f  time_noise: %.4f "
                                 "time_min_ls: %.4f  time_max_ls: %.4f"
                                 % (mcmc_iter+1, self.mcmc_iters, np.exp(self.time_mean),
                                    np.sqrt(self.time_amp2), np.exp(self.time_noise),
                                    np.min(self.time_ls), np.max(self.time_ls)))
            self.dump_hypers()

            # Pick the top candidates to optimize over
            overall_ei = self.ei_over_hypers(comp,pend,cand2,vals,durs)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds,:]

            # Adjust the candidates to hit ei peaks
            b = []# optimization bounds
            for i in range(0, cand.shape[1]):
                b.append((0, 1))

            for i in range(0, cand2.shape[0]):
                log("Optimizing candidate %d/%d" %
                                 (i+1, cand2.shape[0]))
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei_over_hypers,
                                        cand2[i,:].flatten(),
                                        args=(comp,vals,durs,True),
                                        bounds=b, disp=0)
                cand2[i,:] = ret[0]

            cand = np.vstack((cand, cand2))

            overall_ei = self.ei_over_hypers(comp,pend,cand,vals,durs)
            best_cand = np.argmax(np.mean(overall_ei, axis=1))
            self.dump_hypers()
            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand,:])

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            self.optimize_hypers(comp, vals, durs)

            log("mean: %f  amp: %f  noise: %f "
                             "min_ls: %f  max_ls: %f"
                             % (self.mean, np.sqrt(self.amp2),
                                self.noise, np.min(self.ls), np.max(self.ls)))

            # Pick the top candidates to optimize over
            ei = self.compute_ei_per_s(comp, pend, cand2, vals, durs)
            inds = np.argsort(np.mean(overall_ei, axis=1))[-self.grid_subset:]
            cand2 = cand2[inds,:]

            # Adjust the candidates to hit ei peaks
            b = []# optimization bounds
            for i in range(0, cand.shape[1]):
                b.append((0, 1))

            for i in range(0, cand2.shape[0]):
                log("Optimizing candidate %d/%d" %
                                 (i+1, cand2.shape[0]))
                ret = spo.fmin_l_bfgs_b(self.grad_optimize_ei,
                                        cand2[i,:].flatten(),
                                        args=(comp,vals,durs,True),
                                        bounds=b, disp=0)
                cand2[i,:] = ret[0]

            cand = np.vstack((cand, cand2))
            ei = self.compute_ei_per_s(comp, pend, cand, vals, durs)

            best_cand = np.argmax(ei)
            self.dump_hypers()

            if (best_cand >= numcand):
                return (int(numcand), cand[best_cand,:])

            return int(candidates[best_cand])

    # Compute EI over hyperparameter samples
    def ei_over_hypers(self,comp,pend,cand,vals,durs):
        overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))
        for mcmc_iter in range(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            time_hyper = self.time_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.time_mean = time_hyper[0]
            self.time_noise = time_hyper[1]
            self.time_amp2 = time_hyper[2]
            self.time_ls = time_hyper[3]

            overall_ei[:,mcmc_iter] = self.compute_ei_per_s(comp, pend, cand,
                                                            vals, durs.squeeze())

            return overall_ei

    def check_grad_ei_per(self, cand, comp, vals, durs):
        (ei,dx1) = self.grad_optimize_ei_over_hypers(cand, comp, vals, durs)
        dx2 = dx1*0
        idx = np.zeros(cand.shape[0])
        for i in range(0, cand.shape[0]):
            idx[i] = 1e-6
            (ei1,tmp) = self.grad_optimize_ei_over_hypers(cand + idx, comp, vals, durs)
            (ei2,tmp) = self.grad_optimize_ei_over_hypers(cand - idx, comp, vals, durs)
            dx2[i] = (ei - ei2)/(2*1e-6)
            idx[i] = 0
        print('computed grads', dx1)
        print('finite diffs', dx2)
        print(dx1/dx2)
        print(np.sum((dx1 - dx2)**2))
        time.sleep(2)

    # Adjust points by optimizing EI over a set of hyperparameter samples
    def grad_optimize_ei_over_hypers(self, cand, comp, vals, durs, compute_grad=True):
        summed_ei = 0
        summed_grad_ei = np.zeros(cand.shape).flatten()

        for mcmc_iter in range(self.mcmc_iters):
            hyper = self.hyper_samples[mcmc_iter]
            time_hyper = self.time_hyper_samples[mcmc_iter]
            self.mean = hyper[0]
            self.noise = hyper[1]
            self.amp2 = hyper[2]
            self.ls = hyper[3]

            self.time_mean = time_hyper[0]
            self.time_noise = time_hyper[1]
            self.time_amp2 = time_hyper[2]
            self.time_ls = time_hyper[3]

            if compute_grad:
                (ei,g_ei) = self.grad_optimize_ei(cand,comp,vals,durs,compute_grad)
                summed_grad_ei = summed_grad_ei + g_ei
            else:
                ei = self.grad_optimize_ei(cand,comp,vals,durs,compute_grad)

            summed_ei += ei

        if compute_grad:
            return (summed_ei, summed_grad_ei)
        else:
            return summed_ei

    def grad_optimize_ei(self, cand, comp, vals, durs, compute_grad=True):
        # Here we have to compute the gradients for ei per second
        # This means deriving through the two kernels, the one for predicting
        # time and the one predicting ei
        best = np.min(vals)
        cand = np.reshape(cand, (-1, comp.shape[1]))

        # First we make predictions for the durations
        # Compute covariances
        comp_time_cov   = self.cov(self.time_amp2, self.time_ls, comp)
        cand_time_cross = self.cov(self.time_amp2, self.time_ls,comp,cand)

        # Cholesky decompositions
        obsv_time_cov  = comp_time_cov + self.time_noise*np.eye(comp.shape[0])
        obsv_time_chol = spla.cholesky( obsv_time_cov, lower=True )

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_time_chol, True), durs - self.time_mean)

        # Predict marginal mean times and (possibly) variances
        func_time_m = np.dot(cand_time_cross.T, t_alpha) + self.time_mean

        # We don't really need the time variances now
        #func_time_v = self.time_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Bring time out of the log domain
        func_time_m = np.exp(func_time_m)

        # Compute derivative of cross-distances.
        grad_cross_r = gp.grad_dist2(self.time_ls, comp, cand)

        # Apply covariance function
        cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.time_ls, comp, cand)
        grad_cross_t = np.squeeze(cand_cross_grad)

        # Now compute the gradients w.r.t. ei
        # The primary covariances for prediction.
        comp_cov   = self.cov(self.amp2, self.ls, comp)
        cand_cross = self.cov(self.amp2, self.ls, comp, cand)

        # Compute the required Cholesky.
        obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
        obsv_chol = spla.cholesky( obsv_cov, lower=True )

        cand_cross_grad = cov_grad_func(self.ls, comp, cand)

        # Predictive things.
        # Solve the linear systems.
        alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
        beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

        # Expected improvement
        func_s = np.sqrt(func_v)
        u      = (best - func_m) / func_s
        ncdf   = sps.norm.cdf(u)
        npdf   = sps.norm.pdf(u)
        ei     = func_s*(u*ncdf + npdf)

        ei_per_s = -np.sum(ei/func_time_m)
        if not compute_grad:
            return ei

        grad_time_xp_m = np.dot(t_alpha.transpose(),grad_cross_t)

        # Gradients of ei w.r.t. mean and variance
        g_ei_m = -ncdf
        g_ei_s2 = 0.5*npdf / func_s

        # Apply covariance function
        grad_cross = np.squeeze(cand_cross_grad)

        grad_xp_m = np.dot(alpha.transpose(),grad_cross)
        grad_xp_v = np.dot(-2*spla.cho_solve((obsv_chol, True),
                                             cand_cross).transpose(),grad_cross)

        grad_xp = 0.5*self.amp2*(grad_xp_m*g_ei_m + grad_xp_v*g_ei_s2)
        grad_time_xp_m = 0.5*self.time_amp2*grad_time_xp_m*func_time_m
        grad_xp = (func_time_m*grad_xp - ei*grad_time_xp_m)/(func_time_m**2)

        return ei_per_s, grad_xp.flatten()

    def compute_ei_per_s(self, comp, pend, cand, vals, durs):
        # First we make predictions for the durations as that
        # doesn't depend on pending experiments

        # Compute covariances
        comp_time_cov   = self.cov(self.time_amp2, self.time_ls, comp)
        cand_time_cross = self.cov(self.time_amp2, self.time_ls,comp,cand)

        # Cholesky decompositions
        obsv_time_cov  = comp_time_cov + self.time_noise*np.eye(comp.shape[0])
        obsv_time_chol = spla.cholesky( obsv_time_cov, lower=True )

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_time_chol, True), durs - self.time_mean)
        #t_beta   = spla.solve_triangular(obsv_time_chol, cand_time_cross, lower=True)

        # Predict marginal mean times and (possibly) variances
        func_time_m = np.dot(cand_time_cross.T, t_alpha) + self.time_mean
        # We don't really need the time variances now
        #func_time_v = self.time_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Bring time out of the log domain
        func_time_m = np.exp(func_time_m)

        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov   = self.cov(self.amp2, self.ls, comp)
            cand_cross = self.cov(self.amp2, self.ls, comp, cand)

            # Compute the required Cholesky.
            obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
            obsv_chol = spla.cholesky( obsv_cov, lower=True )

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u      = (best - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            ei_per_s = ei/func_time_m
            return ei_per_s
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov  = self.cov(self.amp2, self.ls, comp_pend) + self.noise*np.eye(comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(self.amp2, self.ls, comp, pend)
            pend_kappa = self.cov(self.amp2, self.ls, pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0],:comp.shape[0]]

            # Solve the linear systems.
            alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta   = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = np.dot(pend_chol, npr.randn(pend.shape[0],self.pending_samples)) + pend_m[:,None]

            # Include the fantasies.
            fant_vals = np.concatenate((np.tile(vals[:,np.newaxis],
                                                (1,self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(self.amp2, self.ls, comp_pend, cand)

            # Solve the linear systems.
            alpha  = spla.cho_solve((comp_pend_chol, True), fant_vals - self.mean)
            beta   = spla.solve_triangular(comp_pend_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:,np.newaxis])
            u      = (bests[np.newaxis,:] - func_m) / func_s
            ncdf   = sps.norm.cdf(u)
            npdf   = sps.norm.pdf(u)
            ei     = func_s*( u*ncdf + npdf)

            return np.divide(np.mean(ei, axis=1), func_time_m)

    def sample_hypers(self, comp, vals, durs):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)

        self._sample_time_noisy(comp, durs.squeeze())
        self._sample_time_ls(comp, durs.squeeze())

        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.time_hyper_samples.append((self.time_mean, self.time_noise, self.time_amp2,
                                        self.time_ls))

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov   = self.amp2 * (self.cov_func(ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-self.mean, solve)
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_time_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.time_max_ls):
                return -np.inf

            cov   = self.time_amp2 * (self.cov_func(ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + self.time_noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.time_mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-self.time_mean, solve)
            return lp

        self.time_ls = util.slice_sample(self.time_ls, logprob, compwise=True)

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov   = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale/noise)**2))
            #lp -= 0.5*(np.log(noise)/self.noise_scale)**2

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = hypers[2]

    def _sample_time_noisy(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov   = amp2 * (self.cov_func(self.time_ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.time_noise_scale/noise)**2))
            #lp -= 0.5*(np.log(noise)/self.time_noise_scale)**2

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(np.sqrt(amp2))/self.time_amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.time_mean, self.time_amp2, self.time_noise]), logprob, compwise=False)
        self.time_mean  = hypers[0]
        self.time_amp2  = hypers[1]
        self.time_noise = hypers[2]

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean  = hypers[0]
            amp2  = hypers[1]
            noise = 1e-3

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0:
                return -np.inf

            cov   = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6*np.eye(comp.shape[0])) + noise*np.eye(comp.shape[0])
            chol  = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp    = -np.sum(np.log(np.diag(chol)))-0.5*np.dot(vals-mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5*(np.log(amp2)/self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2, self.noise]), logprob, compwise=False)
        self.mean  = hypers[0]
        self.amp2  = hypers[1]
        self.noise = 1e-3

    def optimize_hypers(self, comp, vals, durs):
        # First the GP to observations
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp,vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise

        # Now the GP to times
        timegp = gp.GP(self.cov_func.__name__)
        timegp.real_init(comp.shape[1], durs)
        timegp.optimize_hypers(comp, durs)
        self.time_mean  = timegp.mean
        self.time_amp2  = timegp.amp2
        self.time_noise = timegp.noise
        self.time_ls    = timegp.ls

        # Save hyperparameter samples
        self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        self.time_hyper_samples.append((self.time_mean, self.time_noise, self.time_amp2,
                                        self.time_ls))
        self.dump_hypers()
Esempio n. 32
0
class GPEIChooser:
    def __init__(self,
                 expt_dir,
                 covar="Matern52",
                 mcmc_iters=10,
                 pending_samples=100,
                 noiseless=False):
        self.cov_func = getattr(gp, covar)
        self.locker = Locker()
        self.state_pkl = os.path.join(expt_dir, self.__module__ + ".pkl")

        self.mcmc_iters = int(mcmc_iters)
        self.pending_samples = pending_samples
        self.D = -1
        self.hyper_iters = 1
        self.noiseless = bool(int(noiseless))

        self.noise_scale = 0.1  # horseshoe prior
        self.amp2_scale = 1  # zero-mean log normal prior
        self.max_ls = 2  # top-hat prior on length scales
        self.ls = None

    def __del__(self):
        self.locker.lock_wait(self.state_pkl)

        # Write the hyperparameters out to a Pickle.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump(
            {
                'dims': self.D,
                'ls': self.ls,
                'amp2': self.amp2,
                'noise': self.noise,
                'mean': self.mean
            }, fh)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s"' % (fh.name, self.state_pkl)
        os.system(cmd)  # TODO: Should check system-dependent return status.

        self.locker.unlock(self.state_pkl)

    def _real_init(self, dims, values):
        self.locker.lock_wait(self.state_pkl)

        if os.path.exists(self.state_pkl):
            fh = open(self.state_pkl, 'r')
            state = cPickle.load(fh)
            fh.close()

            self.D = state['dims']
            self.ls = state['ls']
            self.amp2 = state['amp2']
            self.noise = state['noise']
            self.mean = state['mean']
        else:

            # Input dimensionality.
            self.D = dims

            # Initial length scales.
            self.ls = np.ones(self.D)

            # Initial amplitude.
            self.amp2 = np.std(values) + 1e-4

            # Initial observation noise.
            self.noise = 1e-3

            # Initial mean.
            self.mean = np.mean(values)

        self.locker.unlock(self.state_pkl)

    def cov(self, x1, x2=None):
        if x2 is None:
            return self.amp2 * (self.cov_func(self.ls, x1, None) +
                                1e-6 * np.eye(x1.shape[0]))
        else:
            return self.amp2 * self.cov_func(self.ls, x1, x2)

    def next(self, grid, values, durations, candidates, pending, complete):

        # Don't bother using fancy GP stuff at first.
        if complete.shape[0] < 2:
            return int(candidates[0])

        # Perform the real initialization.
        if self.D == -1:
            self._real_init(grid.shape[1], values[complete])

        # Grab out the relevant sets.
        comp = grid[complete, :]
        cand = grid[candidates, :]
        pend = grid[pending, :]
        vals = values[complete]

        if self.mcmc_iters > 0:
            # Sample from hyperparameters.

            overall_ei = np.zeros((cand.shape[0], self.mcmc_iters))

            for mcmc_iter in xrange(self.mcmc_iters):

                self.sample_hypers(comp, vals)
                log("mean: %f  amp: %f  noise: %f  min_ls: %f  max_ls: %f" %
                    (self.mean, np.sqrt(self.amp2), self.noise, np.min(
                        self.ls), np.max(self.ls)))

                overall_ei[:, mcmc_iter] = self.compute_ei(
                    comp, pend, cand, vals)

            best_cand = np.argmax(np.mean(overall_ei, axis=1))

            return int(candidates[best_cand])

        else:
            # Optimize hyperparameters
            try:
                self.optimize_hypers(comp, vals)
            except:
                # Initial length scales.
                self.ls = np.ones(self.D)
                # Initial amplitude.
                self.amp2 = np.std(vals)
                # Initial observation noise.
                self.noise = 1e-3
            log("mean: %f  amp: %f  noise: %f  min_ls: %f  max_ls: %f" %
                (self.mean, np.sqrt(self.amp2), self.noise, np.min(
                    self.ls), np.max(self.ls)))

            ei = self.compute_ei(comp, pend, cand, vals)
            best_cand = np.argmax(ei)

            return int(candidates[best_cand])

    def compute_ei(self, comp, pend, cand, vals):
        if pend.shape[0] == 0:
            # If there are no pending, don't do anything fancy.

            # Current best.
            best = np.min(vals)

            # The primary covariances for prediction.
            comp_cov = self.cov(comp)
            cand_cross = self.cov(comp, cand)

            # Compute the required Cholesky.
            obsv_cov = comp_cov + self.noise * np.eye(comp.shape[0])
            obsv_chol = spla.cholesky(obsv_cov, lower=True)

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v)
            u = (best - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return ei
        else:
            # If there are pending experiments, fantasize their outcomes.

            # Create a composite vector of complete and pending.
            comp_pend = np.concatenate((comp, pend))

            # Compute the covariance and Cholesky decomposition.
            comp_pend_cov = self.cov(comp_pend) + self.noise * np.eye(
                comp_pend.shape[0])
            comp_pend_chol = spla.cholesky(comp_pend_cov, lower=True)

            # Compute submatrices.
            pend_cross = self.cov(comp, pend)
            pend_kappa = self.cov(pend)

            # Use the sub-Cholesky.
            obsv_chol = comp_pend_chol[:comp.shape[0], :comp.shape[0]]

            # Solve the linear systems.
            alpha = spla.cho_solve((obsv_chol, True), vals - self.mean)
            beta = spla.cho_solve((obsv_chol, True), pend_cross)

            # Finding predictive means and variances.
            pend_m = np.dot(pend_cross.T, alpha) + self.mean
            pend_K = pend_kappa - np.dot(pend_cross.T, beta)

            # Take the Cholesky of the predictive covariance.
            pend_chol = spla.cholesky(pend_K, lower=True)

            # Make predictions.
            pend_fant = (np.dot(
                pend_chol, npr.randn(pend.shape[0], self.pending_samples)) +
                         pend_m[:, None])

            # Include the fantasies.
            fant_vals = np.concatenate(
                (np.tile(vals[:, np.newaxis],
                         (1, self.pending_samples)), pend_fant))

            # Compute bests over the fantasies.
            bests = np.min(fant_vals, axis=0)

            # Now generalize from these fantasies.
            cand_cross = self.cov(comp_pend, cand)

            # Solve the linear systems.
            alpha = spla.cho_solve((comp_pend_chol, True),
                                   fant_vals - self.mean)
            beta = spla.solve_triangular(comp_pend_chol,
                                         cand_cross,
                                         lower=True)

            # Predict the marginal means and variances at candidates.
            func_m = np.dot(cand_cross.T, alpha) + self.mean
            func_v = self.amp2 * (1 + 1e-6) - np.sum(beta**2, axis=0)

            # Expected improvement
            func_s = np.sqrt(func_v[:, np.newaxis])
            u = (bests[np.newaxis, :] - func_m) / func_s
            ncdf = sps.norm.cdf(u)
            npdf = sps.norm.pdf(u)
            ei = func_s * (u * ncdf + npdf)

            return np.mean(ei, axis=1)

    def sample_hypers(self, comp, vals):
        if self.noiseless:
            self.noise = 1e-3
            self._sample_noiseless(comp, vals)
        else:
            self._sample_noisy(comp, vals)
        self._sample_ls(comp, vals)

    def _sample_ls(self, comp, vals):
        def logprob(ls):
            if np.any(ls < 0) or np.any(ls > self.max_ls):
                return -np.inf

            cov = self.amp2 * (self.cov_func(ls, comp, None) + 1e-6 * np.eye(
                comp.shape[0])) + self.noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - self.mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - self.mean, solve)
            return lp

        self.ls = util.slice_sample(self.ls, logprob, compwise=True)

    def _sample_noisy(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = hypers[2]

            # This is pretty hacky, but keeps things sane.
            if mean > np.max(vals) or mean < np.min(vals):
                return -np.inf

            if amp2 < 0 or noise < 0:
                return -np.inf

            cov = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6 * np.eye(
                comp.shape[0])) + noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - mean, solve)

            # Roll in noise horseshoe prior.
            lp += np.log(np.log(1 + (self.noise_scale / noise)**2))

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2,
                                             self.noise]),
                                   logprob,
                                   compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = hypers[2]

    def _sample_noiseless(self, comp, vals):
        def logprob(hypers):
            mean = hypers[0]
            amp2 = hypers[1]
            noise = 1e-3

            if amp2 < 0:
                return -np.inf

            cov = amp2 * (self.cov_func(self.ls, comp, None) + 1e-6 * np.eye(
                comp.shape[0])) + noise * np.eye(comp.shape[0])
            chol = spla.cholesky(cov, lower=True)
            solve = spla.cho_solve((chol, True), vals - mean)
            lp = -np.sum(np.log(np.diag(chol))) - 0.5 * np.dot(
                vals - mean, solve)

            # Roll in amplitude lognormal prior
            lp -= 0.5 * (np.log(amp2) / self.amp2_scale)**2

            return lp

        hypers = util.slice_sample(np.array([self.mean, self.amp2,
                                             self.noise]),
                                   logprob,
                                   compwise=False)
        self.mean = hypers[0]
        self.amp2 = hypers[1]
        self.noise = 1e-3

    def optimize_hypers(self, comp, vals):
        mygp = gp.GP(self.cov_func.__name__)
        mygp.real_init(comp.shape[1], vals)
        mygp.optimize_hypers(comp, vals)
        self.mean = mygp.mean
        self.ls = mygp.ls
        self.amp2 = mygp.amp2
        self.noise = mygp.noise

        # Save hyperparameter samples
        #self.hyper_samples.append((self.mean, self.noise, self.amp2, self.ls))
        #self.dump_hypers()

        return
Esempio n. 33
0
class ExperimentGrid:

    @staticmethod
    def job_running(expt_dir, id):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_running(id)

    @staticmethod
    def job_complete(expt_dir, id, value, duration):
        log("setting job %d complete" % id)
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_complete(id, value, duration)
        log("set...")

    @staticmethod
    def job_broken(expt_dir, id):
        expt_grid = ExperimentGrid(expt_dir)
        expt_grid.set_broken(id)

    def __init__(self, expt_dir, variables=None, grid_size=None, grid_seed=1):
        self._ready = False
        self.expt_dir = expt_dir
        self.jobs_pkl = os.path.join(expt_dir, EXPERIMENT_GRID_FILE)
        self.locker   = Locker()

        # Only one process at a time is allowed to have access to the grid.
        self.locker.lock_wait(self.jobs_pkl)

        # Set up the grid for the first time if it doesn't exist.
        if variables is not None and not os.path.exists(self.jobs_pkl):
            self.seed     = grid_seed
            self.vmap     = GridMap(variables, grid_size)
            self.grid     = self._hypercube_grid(self.vmap.card(), grid_size)
            self.status   = np.zeros(grid_size, dtype=int) + CANDIDATE_STATE
            self.values   = np.zeros(grid_size) + np.nan
            self.durs     = np.zeros(grid_size) + np.nan
            self.proc_ids = np.zeros(grid_size, dtype=int)
            self._ready = True
            self._save_jobs()
        # Or load in the grid from the pickled file.
        else:
            self._load_jobs()


    def __del__(self):
        self._save_jobs()
        if self.locker.unlock(self.jobs_pkl):
            pass
        else:
            raise Exception("Could not release lock on job grid.\n")

    def get_grid(self):
        return self.grid, self.values, self.durs

    def get_candidates(self):
        return np.nonzero(self.status == CANDIDATE_STATE)[0]

    def get_pending(self):
        return np.nonzero((self.status == SUBMITTED_STATE) | (self.status == RUNNING_STATE))[0]

    def get_complete(self):
        return np.nonzero(self.status == COMPLETE_STATE)[0]

    def get_broken(self):
        return np.nonzero(self.status == BROKEN_STATE)[0]

    def get_params(self, index):
        return self.vmap.get_params(self.grid[index,:])

    def get_best(self):
        finite = self.values[np.isfinite(self.values)]
        if len(finite) > 0:
            cur_min = np.min(finite)
            index   = np.nonzero(self.values==cur_min)[0][0]
            return cur_min, index
        else:
            return np.nan, -1

    def get_proc_id(self, id):
        return self.proc_ids[id]

    def add_to_grid(self, candidate):
        # Checks to prevent numerical over/underflow from corrupting the grid
        candidate[candidate > 1.0] = 1.0
        candidate[candidate < 0.0] = 0.0

        # Set up the grid
        self.grid   = np.vstack((self.grid, candidate))
        self.status = np.append(self.status, np.zeros(1, dtype=int) +
                                int(CANDIDATE_STATE))

        self.values = np.append(self.values, np.zeros(1)+np.nan)
        self.durs   = np.append(self.durs, np.zeros(1)+np.nan)
        self.proc_ids = np.append(self.proc_ids, np.zeros(1,dtype=int))

        # Save this out.
        self._save_jobs()
        return self.grid.shape[0]-1

    def set_candidate(self, id):
        self.status[id] = CANDIDATE_STATE
        self._save_jobs()

    def set_submitted(self, id, proc_id):
        self.status[id] = SUBMITTED_STATE
        self.proc_ids[id] = proc_id
        self._save_jobs()

    def set_running(self, id):
        self.status[id] = RUNNING_STATE
        self._save_jobs()

    def set_complete(self, id, value, duration):
        self.status[id] = COMPLETE_STATE
        self.values[id] = value
        self.durs[id]   = duration
        self._save_jobs()

    def set_broken(self, id):
        self.status[id] = BROKEN_STATE
        self._save_jobs()

    def _load_jobs(self):
        fh   = open(self.jobs_pkl, 'r')
        jobs = cPickle.load(fh)
        fh.close()

        self.vmap   = jobs['vmap']
        self.grid   = jobs['grid']
        self.status = jobs['status']
        self.values = jobs['values']
        self.durs   = jobs['durs']
        self.proc_ids = jobs['proc_ids']
        self._ready = True

    def _save_jobs(self):

        if not self._ready:
            return
        # Write everything to a temporary file first.
        fh = tempfile.NamedTemporaryFile(mode='w', delete=False)
        cPickle.dump({ 'vmap'   : self.vmap,
                       'grid'   : self.grid,
                       'status' : self.status,
                       'values' : self.values,
                       'durs'   : self.durs,
                       'proc_ids' : self.proc_ids }, fh, protocol=-1)
        fh.close()

        # Use an atomic move for better NFS happiness.
        cmd = 'mv "%s" "%s.new"' % (fh.name, self.jobs_pkl)
        assert os.system(cmd) == 0 # TODO: Should check system-dependent return status.

        if os.path.exists(self.jobs_pkl):
            assert os.system('mv "%s" "%s.old"' % (self.jobs_pkl, self.jobs_pkl)) == 0

        assert os.system('mv "%s.new" "%s"' % (self.jobs_pkl, self.jobs_pkl)) == 0

    def _hypercube_grid(self, dims, size):
        # Generate from a sobol sequence
        sobol_grid = np.transpose(i4_sobol_generate(dims,size,self.seed))

        return sobol_grid