예제 #1
0
def run_kg(run_index):
    # Store data for debugging
    IS0 = pickle.load(open("enthalpy_N1_R3_Ukcal-mol", 'r'))
    #IS0 = pickle.load(open("enthalpy_N3_R2_Ukcal-mol", 'r'))

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    #sim.hyperparameter_objective = MAP
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = "enthalpy_kg.dat"
    sim.fname_historical = "data_dumps/%d_reduced.history" % run_index

    print "Waiting on %s to be written..." % sim.fname_historical,
    while not os.path.exists(sim.fname_historical):
        time.sleep(30)
    print " DONE"

    # Information sources, in order from expensive to cheap
    sim.IS = [
        lambda h, c, s: -1.0 * IS0[' '.join([''.join(h), c, s])],
    ]
    sim.costs = [1.0]
    sim.save_extra_files = True

    sim.logger_fname = "data_dumps/%d_kg.log" % run_index
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.obj_vs_cost_fname = "data_dumps/%d_kg.dat" % run_index
    sim.mu_fname = "data_dumps/%d_mu_kg.dat" % run_index
    sim.sig_fname = "data_dumps/%d_sig_kg.dat" % run_index
    sim.sample_fname = "data_dumps/%d_sample_kg.dat" % run_index
    sim.combos_fname = "data_dumps/%d_combos_kg.dat" % run_index
    sim.hp_fname = "data_dumps/%d_hp_kg.dat" % run_index
    sim.acquisition_fname = "data_dumps/%d_acq_kg.dat" % run_index
    sim.historical_nsample = 10
    ########################################

    sim.n_start = 20  # The number of starting MLE samples
    sim.reopt = 10
    sim.ramp_opt = None
    sim.parallel = False

    sim.acquisition = getNextSample_kg

    # Possible compositions by default
    sim.A = ["Cs", "MA", "FA"]
    sim.B = ["Pb"]
    sim.X = ["Cl", "Br", "I"]
    sim.solvents = copy.deepcopy(solvents)
    sim.S = list(set([v["name"] for k, v in sim.solvents.items()]))
    sim.mixed_halides = True
    sim.mixed_solvents = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    # Functional forms of our mean and covariance
    # MEAN: 4 * mu_alpha + mu_zeta
    # COV: sig_alpha * |X><X| + sig_beta * I_N + sig_zeta + MaternKernel(S, weights, sig_m)

    SCALE = [2.0, 4.0][int(sim.mixed_halides)]
    # _1, _2, _3 used as dummy entries
    sim.mean = lambda _1, Y, theta: np.array(
        [SCALE * theta.mu_alpha + theta.mu_zeta for _ in Y])

    def cov(X, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X)[:, 1:-3],
            np.array(X)[:, 1:-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X)))
        C = theta.sig_zeta
        D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)

        return A + B + C + D

    sim.cov = cov

    sim.theta.bounds = {}
    sim.theta.mu_alpha, sim.theta.bounds['mu_alpha'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_alpha, sim.theta.bounds['sig_alpha'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_beta, sim.theta.bounds['sig_beta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.mu_zeta, sim.theta.bounds['mu_zeta'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_zeta, sim.theta.bounds['sig_zeta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
    sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)

    # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
    # of the information sources, and use this for scaling our IS.
    # sim.theta.rho, sim.theta.bounds['rho'] = {"[0, 0]": 1}, (1E-1, 5.0)
    # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
    # of the information sources, and use this for scaling our IS.
    sim.theta.rho = {"[0, 0]": 1}
    sim.theta.bounds['rho [0, 0]'] = (1, 1)

    sim.theta.set_hp_names()

    sim.primary_rho_opt = False

    ###################################################################################################

    # Start simulation
    sim.run()
예제 #2
0
from pal.constants.solvents import solvents
from pal.kernels.matern import maternKernel52 as mk52
# from pal.objectives.binding_energy import get_binding_energy as BE
from pal.acquisition.misokg import getNextSample_misokg

import copy
# import random
import numpy as np
import cPickle as pickle

# Store data for debugging
IS0 = pickle.load(open("enthalpy_N3_R2_Ukcal-mol", 'r'))
IS1 = pickle.load(open("enthalpy_N1_R2_Ukcal-mol", 'r'))

# Generate the main object
sim = Optimizer()

# Assign simulation properties
###################################################################################################
# File names
sim.fname_out = "enthalpy.dat"
sim.fname_historical = None

# Information sources, in order from expensive to cheap
sim.IS = [
    lambda h, c, s: IS0[' '.join([''.join(h), c, s])],
    lambda h, c, s: IS1[' '.join([''.join(h), c, s])]
]
sim.costs = [
    2.0,
    1.0
예제 #3
0
def run_misokg(run_index):

    # Store data for debugging
    IS0 = pickle.load(open("enthalpy_N1_R3_Ukcal-mol", 'r'))
    IS1 = pickle.load(open("enthalpy_N1_R2_Ukcal-mol", 'r'))

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    #sim.hyperparameter_objective = MAP
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = "enthalpy_misokg.dat"
    sim.fname_historical = None

    # Information sources, in order from expensive to cheap
    sim.IS = [
        lambda h, c, s: -1.0 * IS0[' '.join([''.join(h), c, s])],
        lambda h, c, s: -1.0 * IS1[' '.join([''.join(h), c, s])]
    ]
    sim.costs = [1.0, 0.1]

    sim.logger_fname = "data_dumps/%d_misokg.log" % run_index
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.obj_vs_cost_fname = "data_dumps/%d_misokg.dat" % run_index
    sim.mu_fname = "data_dumps/%d_mu_misokg.dat" % run_index
    sim.sig_fname = "data_dumps/%d_sig_misokg.dat" % run_index
    sim.combos_fname = "data_dumps/%d_combos_misokg.dat" % run_index
    sim.hp_fname = "data_dumps/%d_hp_misokg.dat" % run_index
    sim.acquisition_fname = "data_dumps/%d_acq_misokg.dat" % run_index
    sim.save_extra_files = True
    ########################################
    # Override the possible combinations with the reduced list of IS0
    # Because we do this, we should also generate our own historical sample
    combos_no_IS = [
        k[1] + "Pb" + k[0] + "_" + k[2]
        for k in [key.split() for key in IS0.keys()]
    ]
    sim.historical_nsample = 10
    choices = np.random.choice(combos_no_IS,
                               sim.historical_nsample,
                               replace=False)
    tmp_data = pal_strings.alphaToNum(choices,
                                      solvents,
                                      mixed_halides=True,
                                      name_has_IS=False)

    data = []
    for IS in range(len(sim.IS)):
        for i, d in enumerate(tmp_data):
            h, c, _, s, _ = pal_strings.parseName(pal_strings.parseNum(
                d, solvents, mixed_halides=True, num_has_IS=False),
                                                  name_has_IS=False)
            c = c[0]
            data.append([IS] + d + [sim.IS[IS](h, c, s)])

    sim.fname_historical = "data_dumps/%d.history" % run_index
    pickle.dump(data, open(sim.fname_historical, 'w'))
    simple_data = [d for d in data if d[0] == 0]
    pickle.dump(simple_data,
                open("data_dumps/%d_reduced.history" % run_index, 'w'))

    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    sim.reopt = 20
    sim.ramp_opt = None
    sim.parallel = False

    # Possible compositions by default
    sim.A = ["Cs", "MA", "FA"]
    sim.B = ["Pb"]
    sim.X = ["Cl", "Br", "I"]
    sim.solvents = copy.deepcopy(solvents)
    sim.S = list(set([v["name"] for k, v in sim.solvents.items()]))
    sim.mixed_halides = True
    sim.mixed_solvents = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    sim.acquisition = getNextSample_misokg

    # Functional forms of our mean and covariance
    # MEAN: 4 * mu_alpha + mu_zeta
    # COV: sig_alpha * |X><X| + sig_beta * I_N + sig_zeta + MaternKernel(S, weights, sig_m)

    SCALE = [2.0, 4.0][int(sim.mixed_halides)]

    # _1, _2, _3 used as dummy entries
    def mean(X, Y, theta):
        mu = np.array([SCALE * theta.mu_alpha + theta.mu_zeta for _ in Y])
        return mu

    sim.mean = mean

    def cov_old(X, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X)[:, 1:-3],
            np.array(X)[:, 1:-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X)))
        C = theta.sig_zeta
        D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        return theta.rho_matrix(X) * (A + B + C + D)

    def cov(X0, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X0)[:, :-3],
            np.array(X0)[:, :-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X0)))
        C = theta.sig_zeta
        D = mk52(np.array(X0)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        Kx = A + B + C + D

        Ks = np.array([
            np.array(
                [theta.rho[str(sorted([i, j]))] for j in range(theta.n_IS)])
            for i in range(theta.n_IS)
        ])
        if theta.normalize_Ks:
            Ks = Ks / np.linalg.norm(Ks)

        e = np.diag(np.array([theta.e1, theta.e2]))
        Ks = e.dot(Ks.dot(e))

        return np.kron(Ks, Kx)

    sim.cov = cov

    sim.theta.bounds = {}
    sim.theta.mu_alpha, sim.theta.bounds['mu_alpha'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_alpha, sim.theta.bounds['sig_alpha'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_beta, sim.theta.bounds['sig_beta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.mu_zeta, sim.theta.bounds['mu_zeta'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_zeta, sim.theta.bounds['sig_zeta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
    sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)

    sim.theta.e1, sim.theta.bounds['e1'] = None, (1E-1, 1.0)
    sim.theta.e2, sim.theta.bounds['e2'] = None, (1E-1, 1.0)

    # # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
    # # of the information sources, and use this for scaling our IS.
    sim.theta.rho = {"[0, 0]": 1.0, "[0, 1]": 0.96, "[1, 1]": 1.0}
    sim.theta.bounds['rho [0, 0]'] = (0.1, 1.0)
    sim.theta.bounds['rho [0, 1]'] = (0.1, 1.0)
    sim.theta.bounds['rho [1, 1]'] = (0.1, 1.0)

    sim.theta.set_hp_names()

    sim.primary_rho_opt = False
    sim.update_hp_only_with_IS0 = False
    sim.update_hp_only_with_overlapped = False

    sim.theta.normalize_L = False
    sim.theta.normalize_Ks = False

    # This was a test feature that actually over-wrote rho to be PSD
    # sim.force_rho_psd = True
    sim.recommendation_kill_switch = "FAPbBrBrCl_THTO_0"

    ###################################################################################################

    # Start simulation
    sim.run()
예제 #4
0
def run(run_index,
        model,
        folder="data_dumps",
        hp_opt="IS0",
        sample_domain=1000):
    '''
    This function will run CO optimization using one of several coregionalization methods.

        1. Pearson R Intrinsic Coregionalization Method (PRICM).  This approach
           will dynamically calculate the Pearson R value for the off-diagonals
           in the ICM.  Diagonals are kept as 1.0.
        2. Intrinsic Coregionalization Method (ICM).  This approach will use a
           lower triangular matrix (L) of hyperparameters to generate the
           coregionalization matrix B = LL^T.

    Further, we can parameterize the hyperparameters in many ways:

        1. IS0 - Only parameterize hyperparameters using values sampled at IS0.
        2. Full - Parameterize hyperparameters using all sampled data.
        3. Overlap - Parameterize hyperparameters using data that overlaps all IS.

    **Parameters**

        run_index: *int*
            This is simply used for a naming convention.
        model: *str*
            The model to be used (PRICM or ICM).
        folder: *str, optional*
            What to name the folder where the data will go.
        hp_opt: *str, optional*
            With what data should the hyperparameters be parameterized.
            Options: IS0, full, overlap
        sample_domain: *int, optional*
            How many data points to sample from the domain.
    '''

    hp_opt = hp_opt.lower()
    allowed_hp_opt = ["is0", "full", "overlap"]
    assert hp_opt in allowed_hp_opt, "Error, hp_opt (%s) not in %s" % (
        hp_opt, ", ".join(allowed_hp_opt))

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = None
    sim.fname_historical = None

    sim.logger_fname = "%s/%d_%s_%s.log" % (folder, run_index, model, hp_opt)
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)
    sim.obj_vs_cost_fname = None
    sim.mu_fname = None
    sim.sig_fname = None
    sim.combos_fname = None
    sim.hp_fname = None
    sim.acquisition_fname = None
    sim.save_extra_files = True

    # Information sources, in order from expensive to cheap
    rosenbrock = lambda x1, x2: (1.0 - x1)**2 + 100.0 * (x2 - x1**2)**2 - 456.3
    sim.IS = [
        lambda x1, x2: -1.0 * rosenbrock(x1, x2), lambda x1, x2: -1.0 *
        (rosenbrock(x1, x2) + 10.0 * np.sin(10.0 * x1 + 5.0 * x2))
    ]
    sim.costs = [1000.0, 1.0]

    sim.save_extra_files = False
    ########################################
    sim.numerical = True
    sim.historical_nsample = 5
    sim.domain = [(-2.0, 2.0), (-2.0, 2.0)]
    sim.sample_n_from_domain = sample_domain
    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    # sim.reopt = 20
    sim.reopt = float('inf')  # Never re-opt hyperparams
    sim.ramp_opt = None
    sim.parallel = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    sim.acquisition = getNextSample_misokg

    # Functional forms of our mean and covariance
    sim.mean = lambda X, Y, theta: np.array([-456.3 for _ in Y])

    def cov_miso(X0, Y, theta, split=False):
        Kx = squared(np.array(X0), [theta.l1], theta.sig_1)
        Kx_l = squared(np.array(X0), [theta.l2], theta.sig_2)
        return np.block([[Kx, Kx], [Kx, Kx + Kx_l]])

    def cov_pricm(X0, Y, theta, split=False):
        Kx = squared(np.array(X0), [theta.l1], theta.sig_1)
        Kx = Kx + 1E-6 * np.eye(Kx.shape[0])

        if model.lower() == "pricm":
            Ks = np.array([
                np.array([
                    theta.rho[str(sorted([i, j]))] for j in range(theta.n_IS)
                ]) for i in range(theta.n_IS)
            ])
        elif model.lower() == "icm":
            L = np.array([
                np.array([
                    theta.rho[str(sorted([i, j]))] if i >= j else 0.0
                    for j in range(theta.n_IS)
                ]) for i in range(theta.n_IS)
            ])
            # Force it to be positive semi-definite
            Ks = L.dot(L.T)

        if split:
            return Ks, Kx
        else:
            return np.kron(Ks, Kx)

    sim.theta.bounds = {}
    sim.theta.sig_1, sim.theta.bounds['sig_1'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)

    if model == "miso":
        sim.cov = cov_miso
        sim.theta.sig_2, sim.theta.bounds['sig_2'] = None, (
            1E-2, lambda _, Y: np.var(Y))
        sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)
        sim.theta.rho = {
            str(sorted([i, j])): 1.0
            for i in range(len(sim.IS)) for j in range(i, len(sim.IS))
        }
    else:
        sim.cov = cov_pricm
        sim.theta.rho = {"[0, 0]": None, "[0, 1]": None, "[1, 1]": None}
        if model.lower() == "icm":
            sim.theta.rho = {
                str(sorted([i, j])): None
                for i in range(len(sim.IS)) for j in range(i, len(sim.IS))
            }
        elif model.lower() == "pricm":
            sim.theta.rho = {
                str(sorted([i, j])): 1.0
                for i in range(len(sim.IS)) for j in range(i, len(sim.IS))
            }
            sim.dynamic_pc = True
        else:
            raise Exception("Invalid model.  Use MISO, ICM, or PRICM")

    for k in sim.theta.rho.keys():
        sim.theta.bounds['rho %s' % k] = (0.1, 1.0)
        a, b = eval(k)
        if a != b:
            sim.theta.bounds['rho %s' % k] = (0.01, 1.0 - 1E-6)

    sim.theta.set_hp_names()

    # Define how we update hyperparameters
    hp_opt = hp_opt.lower()
    if hp_opt == "is0":
        sim.update_hp_only_with_IS0 = True
        sim.update_hp_only_with_overlapped = False
    elif hp_opt == "overlap":
        sim.update_hp_only_with_IS0 = False
        sim.update_hp_only_with_overlapped = True
    elif hp_opt == "full":
        sim.update_hp_only_with_IS0 = False
        sim.update_hp_only_with_overlapped = False
    else:
        raise Exception("Unknown hp_opt (%s)." % hp_opt)

    # These should be False by default, but ensure they are
    sim.theta.normalize_L = False
    sim.theta.normalize_Ks = False
    sim.preconditioned = False

    # Assign our likelihood function.
    sim.loglike = g_loglike
    ###################################################################################################

    # Start simulation
    sim.iteration_kill_switch = None
    sim.cost_kill_switch = 100000
    sim.run()
예제 #5
0
def run(run_index, folder="data_dumps", infosources=0, exact_cost=True):

    # Store data for debugging
    IS_N5R2 = pickle.load(open("enthalpy_N5_R2_wo_GBL_Ukcal-mol", 'r'))
    IS_N3R2 = pickle.load(open("enthalpy_N3_R2_Ukcal-mol", 'r'))
    IS_N1R2 = pickle.load(open("enthalpy_N1_R2_Ukcal-mol", 'r'))
    IS_N1R3 = pickle.load(open("enthalpy_N1_R3_Ukcal-mol", 'r'))

    if infosources == 0:
        IS0 = IS_N1R3
        if exact_cost:
            costs = [6.0]
        else:
            costs = [10.0]
    elif infosources == 1:
        IS0 = IS_N3R2
        if exact_cost:
            costs = [14.0]
        else:
            costs = [10.0]
    elif infosources == 2:
        IS0 = IS_N5R2
        if exact_cost:
            costs = [27.0]
        else:
            costs = [100.0]
    elif infosources == 3:
        IS0 = IS_N5R2
        if exact_cost:
            costs = [27.0]
        else:
            costs = [100.0]
    else:
        raise Exception("HOW?")

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = "enthalpy_ei.dat"
    sim.fname_historical = "%s/%d_reduced.history" % (folder, run_index)

    print "Waiting on %s to be written..." % sim.fname_historical,
    while not all([
            os.path.exists(sim.fname_historical),
            os.path.exists("%s/%d.combos" % (folder, run_index))
    ]):
        time.sleep(30)
    print " DONE"

    # Information sources, in order from expensive to cheap
    sim.IS = [
        lambda h, c, s: -1.0 * IS0[' '.join([''.join(h), c, s])],
    ]
    sim.costs = costs
    sim.save_extra_files = False

    sim.logger_fname = "%s/%d_ei.log" % (folder, run_index)
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.historical_nsample = len(
        pickle.load(open("%s/%d_reduced.history" % (folder, run_index), 'r')))
    sim.combinations = [
        c for c in pickle.load(open("%s/%d.combos" % (folder, run_index), 'r'))
        if c.endswith("0")
    ]
    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    # sim.reopt = 20
    sim.reopt = float("inf")  # Don't reopt hyperparams
    sim.ramp_opt = None
    sim.parallel = False

    # Possible compositions by default
    sim.A = ["Cs", "MA", "FA"]
    sim.B = ["Pb"]
    sim.X = ["Cl", "Br", "I"]
    sim.solvents = copy.deepcopy(solvents)
    sim.S = list(set([v["name"] for k, v in sim.solvents.items()]))
    sim.mixed_halides = True
    sim.mixed_solvents = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    # Functional forms of our mean and covariance
    # MEAN: 4 * mu_alpha + mu_zeta
    # COV: sig_alpha * |X><X| + sig_beta * I_N + sig_zeta + MaternKernel(S, weights, sig_m)

    SCALE = [2.0, 4.0][int(sim.mixed_halides)]
    # _1, _2, _3 used as dummy entries
    sim.mean = lambda _1, Y, theta: np.array(
        [SCALE * theta.mu_alpha + theta.mu_zeta for _ in Y])

    def cov(X, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X)[:, 1:-3],
            np.array(X)[:, 1:-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X)))
        C = theta.sig_zeta
        D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)

        return A + B + C + D

    sim.cov = cov

    sim.theta.bounds = {}
    sim.theta.mu_alpha, sim.theta.bounds['mu_alpha'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_alpha, sim.theta.bounds['sig_alpha'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_beta, sim.theta.bounds['sig_beta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.mu_zeta, sim.theta.bounds['mu_zeta'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_zeta, sim.theta.bounds['sig_zeta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
    sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)

    # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
    # of the information sources, and use this for scaling our IS.
    # sim.theta.rho, sim.theta.bounds['rho'] = {"[0, 0]": 1}, (1E-1, 5.0)
    # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
    # of the information sources, and use this for scaling our IS.
    sim.theta.rho = {"[0, 0]": 1}
    sim.theta.bounds['rho [0, 0]'] = (1, 1)

    sim.theta.set_hp_names()

    h, c, s = min([(IS0[k], k) for k in IS0.keys()])[1].split()
    sim.recommendation_kill_switch = "%sPb%s_%s_0" % (c, h, s)
    sim.primary_rho_opt = False
    sim.update_hp_only_with_IS0 = False

    ###################################################################################################

    # Start simulation
    sim.run()
예제 #6
0
def run(run_index, folder="data_dumps", sample_domain=1000):
    '''
    This function will run CO optimization using one of several coregionalization methods.

        1. Pearson R Intrinsic Coregionalization Method (PRICM).  This approach
           will dynamically calculate the Pearson R value for the off-diagonals
           in the ICM.  Diagonals are kept as 1.0.
        2. Intrinsic Coregionalization Method (ICM).  This approach will use a
           lower triangular matrix (L) of hyperparameters to generate the
           coregionalization matrix B = LL^T.

    Further, we can parameterize the hyperparameters in many ways:

        1. IS0 - Only parameterize hyperparameters using values sampled at IS0.
        2. Full - Parameterize hyperparameters using all sampled data.
        3. Overlap - Parameterize hyperparameters using data that overlaps all IS.

    **Parameters**

        run_index: *int*
            This is simply used for a naming convention.
        folder: *str, optional*
            What to name the folder where the data will go.
        sample_domain: *int, optional*
            How many data points to sample from the domain.
    '''

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    sim.hyperparameter_objective = MLE
    sim.acquisition = getNextSample_EI
    ###################################################################################################
    # File names
    sim.fname_out = None
    sim.fname_historical = None

    sim.logger_fname = "%s/%d_%s.log" % (folder, run_index, "ei")
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)
    sim.obj_vs_cost_fname = None
    sim.mu_fname = None
    sim.sig_fname = None
    sim.combos_fname = None
    sim.hp_fname = None
    sim.acquisition_fname = None
    sim.save_extra_files = True

    # Information sources, in order from expensive to cheap
    IS0 = pickle.load(open("IS0.pickle", 'r'))

    sim.IS = [
        lambda x1: -1.0 * IS0[int((x1 - 0.5) * 1000.0)][0],
    ]
    sim.costs = [
        np.mean([IS[1] for IS in IS0]),
    ]

    sim.save_extra_files = False
    ########################################
    sim.numerical = True
    sim.historical_nsample = 5
    sim.domain = [(0.5, 2.5)]
    sim.sample_n_from_domain = sample_domain
    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    # sim.reopt = 20
    sim.reopt = float("inf")
    sim.ramp_opt = None
    sim.parallel = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    # Functional forms of our mean and covariance
    sim.mean = lambda X, Y, theta: np.array([0.0 for _ in Y])

    def cov(X0, Y, theta):
        return squared(np.array(X0), [theta.l1], theta.sig_1)

    sim.cov = cov

    sim.theta.bounds = {}
    sim.theta.sig_1, sim.theta.bounds['sig_1'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)

    sim.theta.rho = {
        str(sorted([i, j])): 1.0
        for i in range(len(sim.IS)) for j in range(i, len(sim.IS))
    }
    for k in sim.theta.rho.keys():
        sim.theta.bounds['rho %s' % k] = (0.1, 1.0)
        a, b = eval(k)
        if a != b:
            sim.theta.bounds['rho %s' % k] = (0.01, 1.0 - 1E-6)

    sim.theta.set_hp_names()

    # Define how we update hyperparameters
    sim.update_hp_only_with_IS0 = False
    sim.update_hp_only_with_overlapped = False

    # These should be False by default, but ensure they are
    sim.theta.normalize_L = False
    sim.theta.normalize_Ks = False
    sim.preconditioned = False

    # Assign our likelihood function.
    sim.loglike = g_loglike
    ###################################################################################################

    # Start simulation
    sim.iteration_kill_switch = None
    sim.cost_kill_switch = 3000
    sim.run()
예제 #7
0
파일: run_ei.py 프로젝트: ClancyLab/PAL
def run_ei(run_index, SAMPLE_DOMAIN=1000):

    FOLDER = "RNS%d" % SAMPLE_DOMAIN
    sffx = "ei"

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    #if use_MAP:
    #    sim.hyperparameter_objective = MAP
    #else:
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = None
    sim.fname_historical = None

    sim.logger_fname = "%s/%d_%s.log" % (FOLDER, run_index, sffx)
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.obj_vs_cost_fname = None
    sim.mu_fname = None
    sim.sig_fname = None
    sim.combos_fname = None
    sim.hp_fname = None
    sim.acquisition_fname = None
    sim.save_extra_files = True

    # Information sources, in order from expensive to cheap
    rosenbrock = lambda x1, x2: (1.0 - x1)**2 + 100.0 * (x2 - x1**2)**2 - 456.3
    sim.IS = [lambda x1, x2: -1.0 * rosenbrock(x1, x2)]
    sim.costs = [1000.0]

    ########################################
    sim.numerical = True
    sim.historical_nsample = 5
    sim.domain = [(-2.0, 2.0), (-2.0, 2.0)]
    sim.sample_n_from_domain = SAMPLE_DOMAIN
    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    sim.reopt = 20
    sim.ramp_opt = None
    sim.parallel = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    # Functional forms of our mean and covariance
    sim.mean = lambda X, Y, theta: np.array([-456.3 for _ in Y])

    def cov(X0, Y, theta):
        return squared(np.array(X0)[:, 1:], [theta.l1, theta.l2], theta.sig_1)

    sim.cov = cov

    sim.theta.bounds = {}
    sim.theta.sig_1, sim.theta.bounds['sig_1'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
    sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)
    sim.theta.rho = {
        str(sorted([i, j])): 1.0
        for i in range(len(sim.IS)) for j in range(i, len(sim.IS))
    }
    for k in sim.theta.rho.keys():
        sim.theta.bounds['rho %s' % k] = (0.1, 1.0)
        a, b = eval(k)
        if a != b:
            sim.theta.bounds['rho %s' % k] = (0.01, 1.0 - 1E-6)

    sim.theta.set_hp_names()

    sim.update_hp_only_with_IS0 = False
    sim.update_hp_only_with_overlapped = False
    sim.theta.normalize_L = False
    sim.theta.normalize_Ks = False

    ###################################################################################################

    # Start simulation
    sim.iteration_kill_switch = 200
    sim.cost_kill_switch = 10000
    sim.run()
예제 #8
0
파일: run_misokg.py 프로젝트: ClancyLab/PAL
def run_misokg(run_index,
               sffx="misokg",
               scaled=False,
               loose=False,
               very_loose=False,
               use_MAP=False,
               upper=1.0,
               run_unitary=False,
               use_miso=False):

    SAMPLE_DOMAIN = 1000

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    if use_MAP:
        sim.hyperparameter_objective = MAP
    else:
        sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = None
    sim.fname_historical = None

    sim.logger_fname = "data_dumps/%d_%s.log" % (run_index, sffx)
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.obj_vs_cost_fname = None
    sim.mu_fname = None
    sim.sig_fname = None
    sim.combos_fname = None
    #sim.hp_fname = None
    sim.hp_fname = "data_dumps/%d_HP_%s.log" % (run_index, sffx)
    sim.acquisition_fname = None
    sim.save_extra_files = True

    # Information sources, in order from expensive to cheap
    rosenbrock = lambda x1, x2: (1.0 - x1)**2 + 100.0 * (x2 - x1**2)**2 - 456.3
    sim.IS = [
        lambda x1, x2: -1.0 * rosenbrock(x1, x2), lambda x1, x2: -1.0 *
        (rosenbrock(x1, x2) + 0.1 * np.sin(10.0 * x1 + 5.0 * x2))
    ]
    #sim.IS = [
    #    lambda x1, x2: (1.0 - x1)**2 + 100.0 * (x2 - x1**2)**2 - 456.3 + np.random.normal()
    #    lambda x1, x2: (1.0 - x1)**2 + 100.0 * (x2 - x1**2)**2 - 456.3 + 2.0 * np.sin(10.0 * x1 + 5.0 * x2)
    #]
    sim.costs = [1000.0, 1.0]

    ########################################
    sim.numerical = True
    sim.historical_nsample = 5
    sim.domain = [(-2.0, 2.0), (-2.0, 2.0)]
    sim.sample_n_from_domain = SAMPLE_DOMAIN
    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    sim.reopt = 20
    sim.ramp_opt = None
    sim.parallel = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    sim.acquisition = getNextSample_misokg

    # Functional forms of our mean and covariance
    sim.mean = lambda X, Y, theta: np.array([-456.3 for _ in Y])

    def cov_miso(X0, Y, theta):
        Kx = squared(np.array(X0)[:, 1:], [theta.l1, theta.l2], theta.sig_1)
        Kx_l = squared(np.array(X0)[:, 1:], [theta.l3, theta.l4], theta.sig_2)
        return np.block([[Kx, Kx], [Kx, Kx + Kx_l]])

    def cov_bonilla(X0, Y, theta):
        #Kx = mk52(np.array(X0)[:, 1:], [theta.l1, theta.l2], theta.sig_m)
        Kx = squared(np.array(X0)[:, 1:], [theta.l1, theta.l2], theta.sig_m)
        Kx = Kx + 1E-6 * np.eye(Kx.shape[0])

        if run_unitary:
            Ks = np.array([[1.0, 1.0 - 1E-6], [1.0 - 1E-6, 1.0]])
        else:
            L = np.array([
                np.array([
                    theta.rho[str(sorted([i, j]))] if i >= j else 0.0
                    for j in range(theta.n_IS)
                ])  # Lower triangulary
                for i in range(theta.n_IS)
            ])
            Ks = L.dot(L.T)

        e = np.diag(np.array([theta.e1, theta.e2]))

        Ks = np.matmul(e, np.matmul(Ks, e))

        K = np.kron(Ks, Kx)

        return np.kron(Ks, Kx)

    if use_miso:
        sim.cov = cov_miso

        sim.theta.bounds = {}
        sim.theta.sig_1, sim.theta.bounds['sig_1'] = None, (
            1E-2, lambda _, Y: np.var(Y))
        sim.theta.sig_2, sim.theta.bounds['sig_2'] = None, (
            1E-2, lambda _, Y: np.var(Y))
        sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
        sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)
        sim.theta.l3, sim.theta.bounds['l3'] = None, (1E-1, 1)
        sim.theta.l4, sim.theta.bounds['l4'] = None, (1E-1, 1)

        sim.theta.rho = {"[0, 0]": 1.0, "[0, 1]": 1.0, "[1, 1]": 1.0}
        sim.theta.bounds['rho [0, 0]'] = (1.0, 1.0)
        sim.theta.bounds['rho [0, 1]'] = (1.0, 1.0)
        sim.theta.bounds['rho [1, 1]'] = (1.0, 1.0)
    else:
        sim.cov = cov_bonilla

        sim.theta.bounds = {}
        sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (
            1E-2, lambda _, Y: np.var(Y))
        sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
        sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)

        if scaled:
            sim.theta.e1, sim.theta.bounds['e1'] = None, (1E-1, upper)
            sim.theta.e2, sim.theta.bounds['e2'] = None, (1E-1, upper)
        else:
            sim.theta.e1, sim.theta.bounds['e1'] = 1.0, (1E-1, upper)
            sim.theta.e2, sim.theta.bounds['e2'] = 1.0, (1E-1, upper)

        # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
        # of the information sources, and use this for scaling our IS.
        if very_loose:
            sim.theta.rho = {"[0, 0]": None, "[0, 1]": None, "[1, 1]": None}
        elif loose:
            sim.theta.rho = {"[0, 0]": 1.0, "[0, 1]": None, "[1, 1]": 1.0}
        elif run_unitary:
            sim.theta.rho = {"[0, 0]": 1.0, "[0, 1]": 1.0, "[1, 1]": 1.0}
        else:
            raise Exception("What is trying to be run?")
        sim.theta.bounds['rho [0, 0]'] = (0.1, upper)
        sim.theta.bounds['rho [0, 1]'] = (0.1, upper)
        sim.theta.bounds['rho [1, 1]'] = (0.01, upper)

    sim.theta.set_hp_names()

    sim.update_hp_only_with_IS0 = False
    sim.update_hp_only_with_overlapped = False
    sim.theta.normalize_L = False
    sim.theta.normalize_Ks = False

    ###################################################################################################

    # Start simulation
    sim.iteration_kill_switch = 200
    sim.cost_kill_switch = 10000
    sim.run()
예제 #9
0
def run_misokg(run_index, sffx="misokg", SAMPLE_DOMAIN=1000):

    FOLDER = "RNS%d" % SAMPLE_DOMAIN

    scaled = False
    dpc = False
    invert_dpc = False
    scaled = False
    use_I = False
    use_J = False
    use_miso = False

    if sffx == "misokg":
        use_miso = True
    elif sffx == "bdpc":
        dpc = True
    elif sffx == "bidpc":
        dpc = True
        invert_dpc = True
    elif sffx == "bvl":
        pass
    elif sffx == "bsvl":
        scaled = True
    elif sffx == "bI":
        use_I = True
    elif sffx == "bu":
        use_J = True
    else:
        raise Exception("This sffx (%s) is not accounted for." % sffx)

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    #if use_MAP:
    #    sim.hyperparameter_objective = MAP
    #else:
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = None
    sim.fname_historical = None

    sim.logger_fname = "%s/%d_%s.log" % (FOLDER, run_index, sffx)
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.obj_vs_cost_fname = None
    sim.mu_fname = None
    sim.sig_fname = None
    sim.combos_fname = None
    sim.hp_fname = None
    sim.acquisition_fname = None
    sim.save_extra_files = True

    # Information sources, in order from expensive to cheap
    rosenbrock = lambda x1, x2: (1.0 - x1)**2 + 100.0 * (x2 - x1**2)**2 - 456.3
    sim.IS = [
        lambda x1, x2: -1.0 * rosenbrock(x1, x2), lambda x1, x2: -1.0 *
        (rosenbrock(x1, x2) + 0.1 * np.sin(10.0 * x1 + 5.0 * x2))
    ]
    #sim.IS = [
    #    lambda x1, x2: (1.0 - x1)**2 + 100.0 * (x2 - x1**2)**2 - 456.3 + np.random.normal()
    #    lambda x1, x2: (1.0 - x1)**2 + 100.0 * (x2 - x1**2)**2 - 456.3 + 2.0 * np.sin(10.0 * x1 + 5.0 * x2)
    #]
    sim.costs = [1000.0, 1.0]

    ########################################
    sim.numerical = True
    sim.historical_nsample = 5
    sim.domain = [(-2.0, 2.0), (-2.0, 2.0)]
    sim.sample_n_from_domain = SAMPLE_DOMAIN
    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    sim.reopt = 20
    sim.ramp_opt = None
    sim.parallel = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    sim.acquisition = getNextSample_misokg

    # Functional forms of our mean and covariance
    sim.mean = lambda X, Y, theta: np.array([-456.3 for _ in Y])

    def cov_miso(X0, Y, theta):
        Kx = squared(np.array(X0)[:, 1:], [theta.l1, theta.l2], theta.sig_1)
        Kx_l = squared(np.array(X0)[:, 1:], [theta.l3, theta.l4], theta.sig_2)
        return np.block([[Kx, Kx], [Kx, Kx + Kx_l]])

    def cov_bonilla(X0, Y, theta):
        Kx = squared(np.array(X0)[:, 1:], [theta.l1, theta.l2], theta.sig_1)
        Kx = Kx + 1E-6 * np.eye(Kx.shape[0])

        if use_J:
            Ks = np.ones((theta.n_IS, theta.n_IS)) * (1.0 - 1E-6) + np.eye(
                theta.n_IS) * 1E-6
        elif use_I:
            Ks = np.eye(theta.n_IS)
        elif dpc and invert_dpc:
            Ks = np.array([
                np.array([
                    1.0 if i != j else theta.rho["[0, %d]" % i]**(-2.0)
                    for j in range(theta.n_IS)
                ]) for i in range(theta.n_IS)
            ])
        elif dpc:
            Ks = np.array([
                np.array([
                    theta.rho[str(sorted([i, j]))] for j in range(theta.n_IS)
                ]) for i in range(theta.n_IS)
            ])
        else:
            L = np.array([
                np.array([
                    theta.rho[str(sorted([i, j]))] if i >= j else 0.0
                    for j in range(theta.n_IS)
                ]) for i in range(theta.n_IS)
            ])
            # Force it to be positive semi-definite
            Ks = L.dot(L.T)
            if theta.n_IS == 2:
                e = np.diag(np.array([theta.e1, theta.e2]))
            elif theta.n_IS == 3:
                e = np.diag(np.array([theta.e1, theta.e2, theta.e3]))
            else:
                raise Exception("HOW?")
            Ks = e.dot(Ks.dot(e))

        return np.kron(Ks, Kx)

    sim.theta.bounds = {}
    sim.theta.sig_1, sim.theta.bounds['sig_1'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
    sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)

    if use_miso:
        sim.cov = cov_miso
        sim.theta.sig_2, sim.theta.bounds['sig_2'] = None, (
            1E-2, lambda _, Y: np.var(Y))
        sim.theta.l3, sim.theta.bounds['l3'] = None, (1E-1, 1)
        sim.theta.l4, sim.theta.bounds['l4'] = None, (1E-1, 1)
        sim.theta.rho = {
            str(sorted([i, j])): 1.0
            for i in range(len(sim.IS)) for j in range(i, len(sim.IS))
        }
    else:
        sim.cov = cov_bonilla

        if scaled:
            sim.theta.e1, sim.theta.bounds['e1'] = None, (1E-1, 1.0)
            sim.theta.e2, sim.theta.bounds['e2'] = None, (1E-1, 1.0)
        else:
            sim.theta.e1, sim.theta.bounds['e1'] = 1.0, (1E-1, 1.0)
            sim.theta.e2, sim.theta.bounds['e2'] = 1.0, (1E-1, 1.0)

        sim.theta.rho = {"[0, 0]": None, "[0, 1]": None, "[1, 1]": None}
        if dpc or use_I or use_J:
            sim.theta.rho = {
                str(sorted([i, j])): 1.0
                for i in range(len(sim.IS)) for j in range(i, len(sim.IS))
            }
            sim.dynamic_pc = dpc

    for k in sim.theta.rho.keys():
        sim.theta.bounds['rho %s' % k] = (0.1, 1.0)
        a, b = eval(k)
        if a != b:
            sim.theta.bounds['rho %s' % k] = (0.01, 1.0 - 1E-6)

    sim.theta.set_hp_names()

    sim.update_hp_only_with_IS0 = False
    sim.update_hp_only_with_overlapped = False
    sim.theta.normalize_L = False
    sim.theta.normalize_Ks = False

    ###################################################################################################

    # Start simulation
    sim.iteration_kill_switch = 200
    sim.cost_kill_switch = 10000
    #sim.cost_kill_switch = sim.iteration_kill_switch * sim.costs[0]
    sim.run()
예제 #10
0
def run_misokg(run_index):

    # Store data for debugging
    IS0 = pickle.load(open("enthalpy_N1_R3_Ukcal-mol", 'r'))
    IS1 = pickle.load(open("enthalpy_N1_R2_Ukcal-mol", 'r'))

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    #sim.hyperparameter_objective = MAP
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = "enthalpy_misokg.dat"
    sim.fname_historical = None

    # Information sources, in order from expensive to cheap
    sim.IS = [
        lambda h, c, s: -1.0 * IS0[' '.join([''.join(h), c, s])],
        lambda h, c, s: -1.0 * IS1[' '.join([''.join(h), c, s])]
    ]
    sim.costs = [
        1.0,
        0.1,
    ]

    sim.logger_fname = "data_dumps/%d_misokg.log" % run_index
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.obj_vs_cost_fname = "data_dumps/%d_misokg.dat" % run_index
    sim.mu_fname = "data_dumps/%d_mu_misokg.dat" % run_index
    sim.sig_fname = "data_dumps/%d_sig_misokg.dat" % run_index
    sim.combos_fname = "data_dumps/%d_combos_misokg.dat" % run_index
    sim.hp_fname = "data_dumps/%d_hp_misokg.dat" % run_index
    sim.acquisition_fname = "data_dumps/%d_acq_misokg.dat" % run_index
    sim.save_extra_files = True
    ########################################
    # Override the possible combinations with the reduced list of IS0
    # Because we do this, we should also generate our own historical sample
    combos_no_IS = [
        k[1] + "Pb" + k[0] + "_" + k[2]
        for k in [key.split() for key in IS0.keys()]
    ]
    #sim.historical_nsample = 240
    sim.historical_nsample = 10
    choices = np.random.choice(combos_no_IS,
                               sim.historical_nsample,
                               replace=False)
    tmp_data = pal_strings.alphaToNum(choices,
                                      solvents,
                                      mixed_halides=True,
                                      name_has_IS=False)

    data = []
    for IS in range(len(sim.IS)):
        for i, d in enumerate(tmp_data):
            h, c, _, s, _ = pal_strings.parseName(pal_strings.parseNum(
                d, solvents, mixed_halides=True, num_has_IS=False),
                                                  name_has_IS=False)
            c = c[0]
            data.append([IS] + d + [sim.IS[IS](h, c, s)])

    sim.fname_historical = "data_dumps/%d.history" % run_index
    pickle.dump(data, open(sim.fname_historical, 'w'))
    simple_data = [d for d in data if d[0] == 0]
    pickle.dump(simple_data,
                open("data_dumps/%d_reduced.history" % run_index, 'w'))

    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    sim.reopt = 10
    sim.ramp_opt = None
    sim.parallel = False

    # Possible compositions by default
    sim.A = ["Cs", "MA", "FA"]
    sim.B = ["Pb"]
    sim.X = ["Cl", "Br", "I"]
    sim.solvents = copy.deepcopy(solvents)
    sim.S = list(set([v["name"] for k, v in sim.solvents.items()]))
    sim.mixed_halides = True
    sim.mixed_solvents = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    sim.acquisition = getNextSample_misokg

    # Functional forms of our mean and covariance
    # MEAN: 4 * mu_alpha + mu_zeta
    # COV: sig_alpha * |X><X| + sig_beta * I_N + sig_zeta + MaternKernel(S, weights, sig_m)

    SCALE = [2.0, 4.0][int(sim.mixed_halides)]

    # _1, _2, _3 used as dummy entries
    def mean(X, Y, theta):
        mu = np.array([SCALE * theta.mu_alpha + theta.mu_zeta for _ in Y])
        return mu

    sim.mean = mean

    def cov_old(X, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X)[:, 1:-3],
            np.array(X)[:, 1:-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X)))
        C = theta.sig_zeta
        D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        return theta.rho_matrix(X) * (A + B + C + D)

    def cov_old2(X, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X)[:, 1:-3],
            np.array(X)[:, 1:-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X)))
        C = theta.sig_zeta
        D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        return theta.rho_matrix(X, use_psd=True) * (A + B + C + D)

    def cov_new(X, Y, theta):
        # Get a list of all unique X, removing initial IS identifier
        X0 = []
        for x in X:
            if not any(
                [all([a == b for a, b in zip(x[1:], xchk)]) for xchk in X0]):
                X0.append(x[1:])

        A = theta.sig_alpha * np.dot(
            np.array(X0)[:, :-3],
            np.array(X0)[:, :-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X0)))
        C = theta.sig_zeta
        D = mk52(np.array(X0)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        Kx = A + B + C + D

        L = np.array([
            np.array([
                theta.rho[str(sorted([i, j]))] if i >= j else 0.0
                for j in range(theta.n_IS)
            ]) for i in range(theta.n_IS)
        ])
        # Normalize L to stop over-scaling values small
        L = L / np.linalg.norm(L)
        # Force it to be positive semi-definite
        Ks = L.dot(L.T)

        return np.kron(Ks, Kx)
        #K = np.kron(Ks, Kx)

        # Now, we get the sub-covariance matrix for the specified sampled X and Y
        indices = []
        for l in range(theta.n_IS):
            for i, x in enumerate(X0):
                test = [l] + list(x)
                if any(
                    [all([a == b for a, b in zip(test, xchk)]) for xchk in X]):
                    indices.append(l * len(X0) + i)

        K_local = K[np.ix_(indices, indices)]

        return K_local

    sim.cov = cov_new

    sim.theta.bounds = {}
    sim.theta.mu_alpha, sim.theta.bounds['mu_alpha'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_alpha, sim.theta.bounds['sig_alpha'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_beta, sim.theta.bounds['sig_beta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.mu_zeta, sim.theta.bounds['mu_zeta'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_zeta, sim.theta.bounds['sig_zeta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
    sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)

    # # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
    # # of the information sources, and use this for scaling our IS.
    # sim.theta.rho = {"[0, 0]": 1, "[0, 1]": None, "[1, 1]": 1}
    # sim.theta.bounds['rho [0, 1]'] = (-1.0, 1.0)
    # sim.theta.bounds['rho [0, 0]'] = (1, 1)
    # sim.theta.bounds['rho [1, 1]'] = (1, 1)

    sim.theta.rho = {"[0, 0]": None, "[0, 1]": None, "[1, 1]": None}
    sim.theta.bounds['rho [0, 0]'] = (0.1, 1.0)
    sim.theta.bounds['rho [0, 1]'] = (0.1, 1.0)
    sim.theta.bounds['rho [1, 1]'] = (0.1, 1.0)

    sim.theta.set_hp_names()

    sim.primary_rho_opt = False
    #sim.update_hp_only_with_IS0 = True
    sim.update_hp_only_with_overlapped = True

    ###################################################################################################

    # Start simulation
    sim.run()