def get_combos_pure_solvent(self, A=None, B=None, X=None, S=None): ''' This function will return a reduced list of unique possible ABX3 combinations, given a list of possible A-site, B-site, and X-site atoms (as well as solvent). Note, this will only work for a pure solvent system. ''' if A is None: A = self.A if B is None: B = self.B if X is None: X = self.X if S is None: S = self.S # Error handling for s, x in zip(["A", "B", "X", "S"], [A, B, X, S]): assert isinstance(x, list), "%s is not a list in get_combos_pure." % s if self.mixed_halides: combos = [ "".join(list(x[:2]) + list(sorted(x[2:5])) + list(x[5:])) for x in itertools.product(A, B, X, X, X, ["_"], S) ] else: combos = [ "".join(list(x[:2]) + [x[2]] * 3 + list(x[3:])) for x in itertools.product(A, B, X, ["_"], S) ] combos = sorted(geometry.reduce_list(combos)) final_combos = [] for i in range(len(self.IS)): for combo in combos: final_combos.append(combo + "_" + str(i)) self.combinations = final_combos self.all_X = pal_strings.alphaToNum(self.combinations, solvents, mixed_halides=self.mixed_halides) self.all_solvent_properties = np.array(self.all_X)[:, -3:-1] self.all_Y = np.array([0 for i in range(len(self.all_X))]) return final_combos
def run(self): ''' Run misoKG. ''' if self.save_extra_files and self.overwrite: os.system("rm %s %s" % (self.mu_fname, self.sig_fname)) # Error Handling assert self.costs is not None, "Error - You must specify costs before running!" assert len(self.costs) == len( self.IS ), "Error - You must specify the same number of information sources as costs!" if self.verbose: print( "\n-------------------------------------------------------------------------------------" ) print("Beginning optimization.") # print("\tParallel = %s" % str(self.parallel)) print(" Number of Information Sources = %d" % len(self.IS)) print(" Acquisition = "), if self.acquisition == getNextSample_misokg: print("misoKG with a cost list = %s" % str(self.costs)) elif self.acquisition == getNextSample_EI: print("EI") elif self.acquisition == getNextSample_kg: print("KG") else: print("Custom!") if self.hyperparameter_objective == MLE: obj_name = "MLE" elif self.hyperparameter_objective == MAP: obj_name = "MAP" else: obj_name = "Custom" if self.loglike == gaussian_loglike: loglike_name = "Gaussian" elif self.loglike == bonilla_loglike: loglike_name = "Bonilla" else: loglike_name = "Custom" print( "The Hyperparameter Objective is %s with %d starting samples." % (obj_name, self.n_start)) print("The loglikelihood method is %s." % loglike_name) if self.dynamic_pc: print( "Will use a dynamic pearson correlation coefficient for rho." ) print("Will optimize the following parameters:") print(" " + ', '.join(self.theta.hp_names)) print( "-------------------------------------------------------------------------------------" ) # Start - TIMER self.t0 = time.time() # Step 1 - Ensure we have our historical training set. If not, then # generate one. if self.fname_historical is None: self.fname_historical = "historical.dat" if self.numerical: self.sample_numerical() else: self.sample() else: self.historical = pickle.load(open(self.fname_historical, 'r')) if self.numerical: if len(self.historical[0]) != len(self.domain) + 1: raise Exception( "The historical data seems to be incorrect for misoKG. Maybe the IS associated with each point was not included?" ) if len(self.historical[0]) not in [10, 17]: raise Exception( "The historical data seems to be incorrect for misoKG. Maybe the IS associated with each point was not included?" ) # Step 2 - Generate a full list of our sample space if it has not been given if self.mixed_solvents and not self.numerical: raise Exception( "Mixed Solvents have not been implemented properly.") else: if self.combinations is None and not self.numerical: self.combinations = self.get_combos_pure_solvent() if self.all_X is None and not self.numerical: self.all_X = pal_strings.alphaToNum( self.combinations, solvents, mixed_halides=self.mixed_halides) self.all_solvent_properties = np.array(self.all_X)[:, -3:-1] self.all_Y = np.array([0 for i in range(len(self.all_X))]) # Step 2.5 - Store our X and Y points if not self.numerical: self.assign_samples() # Store a list of samples that have been sampled at all information sources self.indices_overlap = range(len(self.sampled_X)) # Step 3 - Get our hyperparameters. As we don't have initial ones, # don't use_theta for this instance. self.updateHPs(use_theta=False) # Step 3.5 - Initialize indices_overlap variables self.indices_overlap_len = len(self.indices_overlap) self.indices_overlap_changed = False # Step 4 - Update the posterior based on the historical data. self.updatePosterior() if not self.numerical: # Save combinations and default save actions if self.combos_fname is not None: fptr = open(self.combos_fname, 'w') for i, c in enumerate(self.combinations): fptr.write("%d\t%s\n" % (i, c)) fptr.close() self.save() # Step 5 - Begin the main loop start, stop = len(self.sampled_X), len(self.combinations) else: start, stop = len(self.sampled_X), len(self.all_X) best_found_in = start best_value = max( np.array(self.sampled_objectives)[self._get_info_source_map( self.sampled_X)[0]]) best_index = self.sampled_indices[self.sampled_objectives.index( best_value)] best_name = self.combinations[best_index] # Initialize our costs based on the sampled so far self.total_cost = sum([self.costs[int(x[0])] for x in self.sampled_X]) best_prediction = max( np.array(self.mu)[self._get_info_source_map(self.all_X)[0]]) best_prediction = list( np.array(self.mu)[self._get_info_source_map( self.all_X)[0]]).index(best_prediction) best_prediction = self._get_info_source_map( self.all_X)[0][best_prediction] recommendation = self.combinations[best_prediction] if self.save_extra_files and self.sample_fname is not None: fptr = open(self.sample_fname, 'a') for v in zip(self.sampled_names, self.sampled_objectives): fptr.write("%s\t%.4f\n" % v) # Begin the main loop fully_sampled = False recommendation_kill_flag = False iteration_kill_flag = False cost_kill_flag = False for index in range(start, stop): if self.iteration_kill_switch is not None and index >= self.iteration_kill_switch: iteration_kill_flag = True break # If we have sampled all the IS0, and noise doesn't exist, then we gracefully exit if not self.noise and all([ i_IS0 in self.sampled_indices for i_IS0 in self._get_info_source_map(self.all_X)[0] ]): fully_sampled = True break # Step 6 - acquisition Function. Decide on next point(s) to sample. next_point = self.acquisition( self.mu, #self.theta.rho_matrix(self.all_X) * self.K, self.K, max(self.sampled_objectives), len(self.combinations), self.costs, self.all_X, self.sampled_indices, save=self.acquisition_fname) if next_point in self.sampled_indices: print( "\nFAILURE!!!! SAMPLED # %s - Index = %d# POINT TWICE!\n" % (self.combinations[next_point], next_point)) print("K Diagonal = %s" % ' '.join(["%f" % v for v in np.diag(self.K)])) print("K[%d] = %s" % (next_point, ' '.join( ["%f" % v for v in self.K[next_point]]))) print("Sampled Points = %s" % str(self.sampled_indices)) raise Exception( "Error - acquisition function grabbed an already sampled point!" ) if self.verbose: r = -1.23 if "[0, 1]" in self.theta.rho: r = self.theta.rho["[0, 1]"] suffix = "(iter %d) %s = %.4f, sampling %s. Recommendation = %s, Current Cost = %.2f, Rho = %.3f" % ( best_found_in, best_name, best_value, self.combinations[next_point], recommendation, self.total_cost, r) ppb(index, stop, prefix='Running', suffix=suffix, pad=True) if self.logger_fname is not None: fptr = open(self.logger_fname, 'a') fptr.write(suffix + "\n") fptr.close() if self.recommendation_kill_switch is not None and recommendation == self.recommendation_kill_switch: recommendation_kill_flag = True break # Step 7 - Sample point(s) self.sampled_indices.append(next_point) self.sampled_names.append(self.combinations[next_point]) if not self.numerical: self.sampled_X.append( pal_strings.alphaToNum( self.sampled_names[-1], solvents, mixed_halides=self.mixed_halides)[0]) h, c, _, s, info_lvl = pal_strings.parseName( self.sampled_names[-1]) self.sampled_objectives.append(self.IS[info_lvl](h, c[0], s)) else: x = self.all_X[next_point] self.sampled_X.append(x) info_lvl = int(x[0]) self.sampled_objectives.append(self.IS[info_lvl](*x[1:])) if self.save_extra_files and self.sample_fname is not None: fptr = open(self.sample_fname, 'a') fptr.write( "%s\t%.4f\n" % (self.sampled_names[-1], self.sampled_objectives[-1])) # Ensure we get an array of all sampled indices that have been sampled # at ALL information source levels chk = self.sampled_X[-1][1:] if self.numerical: found = [ i for i, v in enumerate(self.sampled_X) if all(chk == v[1:]) ] else: found = [ i for i, v in enumerate(self.sampled_X) if chk == v[1:] ] # Assume we have 4 IS. If we find 4 of chk, then we now have fully sampled chk. if len(found) == len(self.IS): for f in found: if f not in self.indices_overlap: self.indices_overlap.append(f) self.indices_overlap_changed = self.indices_overlap_len != len( self.indices_overlap) self.indices_overlap_len = len(self.indices_overlap) # Step 7.5 - Maybe re-opt the hyperparameters # Note, we do this in a two step approach. First, we optimize all HPs based on # only data points that exist at all levels of theory. Then we optimize # only at the highest level of theory sampled so far (IS0). if index != start and (self.reopt is not None and index % self.reopt == 0) or ( self.ramp_opt is not None and index < self.ramp_opt): self.updateHPs() # Step 8a - Update the posterior completely if we are reoptimizing the HPs self.updatePosterior() else: # Step 8b - Update the posterior with only the newest sampled point self.updatePosterior( (self.sampled_indices[-1], self.sampled_objectives[-1])) self.save() # Count the cost of this iteration self.total_cost += self.costs[info_lvl] if self.cost_kill_switch is not None and self.total_cost > self.cost_kill_switch: cost_kill_flag = True break # Get our recommendation from max(mu) for only IS0 best_prediction = max( np.array(self.mu)[self._get_info_source_map(self.all_X)[0]]) best_prediction = list( np.array(self.mu)[self._get_info_source_map( self.all_X)[0]]).index(best_prediction) best_prediction = self._get_info_source_map( self.all_X)[0][best_prediction] recommendation = self.combinations[best_prediction] # Get the best sampled so far potential_best = max( np.array(self.sampled_objectives)[self._get_info_source_map( self.sampled_X)[0]]) if potential_best > best_value: best_found_in = index best_value = potential_best best_index = self.sampled_indices[ self.sampled_objectives.index(best_value)] best_name = self.combinations[best_index] # END TIMER self.t1 = time.time() if self.verbose: print("-----------------------") print("PAL Optimizer has completed in %.2f s" % (self.t1 - self.t0)) if fully_sampled: print("Optimizer quit early as IS0 was fully sampled") if recommendation_kill_flag: print("Optimizer quit early due to recommendation of %s." % self.recommendation_kill_switch) if iteration_kill_flag: print("Optimizer quit early due to exceeding %d iterations." % self.iteration_kill_switch) if cost_kill_flag: print("Optimizer quit early due to exceeding %.4f cost." % self.cost_kill_switch) print("-----------------------") print("Best combination: %s" % best_name) print(" Objective: %.4f" % best_value) print(" Maximized: %d" % best_found_in) print("-----------------------") print self.theta print( "-------------------------------------------------------------------------------------\n" )
def sample(self, specify=None, debug=False, MAX_LOOP=10, allow_reduced=False): ''' This function will run, in parallel, N_samples of the objective functions for historical data generation. Note, these are run for EVERY information source. ''' if debug: print("Collecting LHS samples...") if specify is None: counter, samples = 0, [] while len( samples) != self.historical_nsample and counter < MAX_LOOP: # Grab a latin hypercube sample samples = doe_lhs.lhs( int(self.mixed_halides) * 2 + 2, self.historical_nsample) # Round the LHS and figure out the samples solvent_ranges = [ i * 1.0 / len(self.S) for i in range(1, len(self.solvents) + 1) ] solv = lambda v: self.S[[v <= s for s in solvent_ranges].index(True)] trio = lambda v: [ int(v > (chk - 1.0 / 3.0) and v <= chk) for chk in [1. / 3., 2. / 3., 1.0] ] # Grab our samples if self.mixed_halides: halides = [sorted([s[0], s[1], s[2]]) for s in samples] halides = [[trio(h) for h in hh] for hh in halides] samples = [ h[0] + h[1] + h[2] + trio(s[3]) + [ self.solvents[solv(s[-1])]["density"], self.solvents[solv(s[-1])]["dielectric"], self.solvents[solv(s[-1])]["index"] ] for h, s in zip(halides, samples) ] else: samples = [ trio(s[0]) + trio(s[1]) + [ self.solvents[solv(s[-1])]["density"], self.solvents[solv(s[-1])]["dielectric"], self.solvents[solv(s[-1])]["index"] ] for s in samples ] # Ensure no duplicates samples = sorted(samples, key=lambda x: x[-1]) samples = [tuple(s) for s in samples] samples = [list(s) for s in set(samples)] counter += 1 else: if isinstance(specify, int): specify = [specify] self.historical_nsample = len(specify) samples = [self.combinations[i] for i in specify] samples = pal_strings.alphaToNum(samples, solvents, mixed_halides=self.mixed_halides, name_has_IS=True) samples = [s[1:] for s in samples ] # Remove the IS label from the descriptor if allow_reduced: print( "Warning - Will sample from subspace due to duplicates (%d instead of %d)." % (len(samples), self.historical_nsample)) self.historical_nsample = len(samples) elif specify is None: assert counter < MAX_LOOP, "Error - Unable to sample from space without duplicates!" if debug: print("Will sample %s" % str(samples)) # Now, run these simulations to get the sample points jobs = [] for i, sample in enumerate(samples): if debug: print "Running %s..." % sample s = pal_strings.parseNum(sample, self.solvents, mixed_halides=self.mixed_halides, num_has_IS=False) hat, cat, _, solv, _ = pal_strings.parseName(s, name_has_IS=False) cat = cat[0] if not self.mixed_halides: hat = hat[0] if debug: print("\tAdding %s to sample runs..." % s) for j, obj in enumerate(self.IS): jobs.append([[j] + copy.deepcopy(sample), obj(hat, cat, solv)]) # Now, get results from each simulation samples = [] for sample, j in jobs: if not isinstance(j, float): j.wait() samples.append(sample + [j.get_result()]) # In special situations, when we are reading from a list for example, we don't need to worry # about a job object, and can just assign the value directly. else: samples.append(sample + [j]) s = pal_strings.parseNum(samples[-1][:-1], self.solvents, mixed_halides=self.mixed_halides, num_has_IS=True) if debug: print("\t%s was found as %lg" % (s, samples[-1][-1])) # Save the sampled data fptr = open(self.fname_historical, "w") pickle.dump(samples, fptr) fptr.close() self.historical = samples if debug: print("Done Collecting Samples\n")
def run_misokg(run_index): # Store data for debugging IS0 = pickle.load(open("enthalpy_N1_R3_Ukcal-mol", 'r')) IS1 = pickle.load(open("enthalpy_N1_R2_Ukcal-mol", 'r')) # Generate the main object sim = Optimizer() # Assign simulation properties #sim.hyperparameter_objective = MAP sim.hyperparameter_objective = MLE ################################################################################################### # File names sim.fname_out = "enthalpy_misokg.dat" sim.fname_historical = None # Information sources, in order from expensive to cheap sim.IS = [ lambda h, c, s: -1.0 * IS0[' '.join([''.join(h), c, s])], lambda h, c, s: -1.0 * IS1[' '.join([''.join(h), c, s])] ] sim.costs = [1.0, 0.1] sim.logger_fname = "data_dumps/%d_misokg.log" % run_index if os.path.exists(sim.logger_fname): os.system("rm %s" % sim.logger_fname) os.system("touch %s" % sim.logger_fname) sim.obj_vs_cost_fname = "data_dumps/%d_misokg.dat" % run_index sim.mu_fname = "data_dumps/%d_mu_misokg.dat" % run_index sim.sig_fname = "data_dumps/%d_sig_misokg.dat" % run_index sim.combos_fname = "data_dumps/%d_combos_misokg.dat" % run_index sim.hp_fname = "data_dumps/%d_hp_misokg.dat" % run_index sim.acquisition_fname = "data_dumps/%d_acq_misokg.dat" % run_index sim.save_extra_files = True ######################################## # Override the possible combinations with the reduced list of IS0 # Because we do this, we should also generate our own historical sample combos_no_IS = [ k[1] + "Pb" + k[0] + "_" + k[2] for k in [key.split() for key in IS0.keys()] ] sim.historical_nsample = 10 choices = np.random.choice(combos_no_IS, sim.historical_nsample, replace=False) tmp_data = pal_strings.alphaToNum(choices, solvents, mixed_halides=True, name_has_IS=False) data = [] for IS in range(len(sim.IS)): for i, d in enumerate(tmp_data): h, c, _, s, _ = pal_strings.parseName(pal_strings.parseNum( d, solvents, mixed_halides=True, num_has_IS=False), name_has_IS=False) c = c[0] data.append([IS] + d + [sim.IS[IS](h, c, s)]) sim.fname_historical = "data_dumps/%d.history" % run_index pickle.dump(data, open(sim.fname_historical, 'w')) simple_data = [d for d in data if d[0] == 0] pickle.dump(simple_data, open("data_dumps/%d_reduced.history" % run_index, 'w')) ######################################## sim.n_start = 10 # The number of starting MLE samples sim.reopt = 20 sim.ramp_opt = None sim.parallel = False # Possible compositions by default sim.A = ["Cs", "MA", "FA"] sim.B = ["Pb"] sim.X = ["Cl", "Br", "I"] sim.solvents = copy.deepcopy(solvents) sim.S = list(set([v["name"] for k, v in sim.solvents.items()])) sim.mixed_halides = True sim.mixed_solvents = False # Parameters for debugging and overwritting sim.debug = False sim.verbose = True sim.overwrite = True # If True, warning, else Error sim.acquisition = getNextSample_misokg # Functional forms of our mean and covariance # MEAN: 4 * mu_alpha + mu_zeta # COV: sig_alpha * |X><X| + sig_beta * I_N + sig_zeta + MaternKernel(S, weights, sig_m) SCALE = [2.0, 4.0][int(sim.mixed_halides)] # _1, _2, _3 used as dummy entries def mean(X, Y, theta): mu = np.array([SCALE * theta.mu_alpha + theta.mu_zeta for _ in Y]) return mu sim.mean = mean def cov_old(X, Y, theta): A = theta.sig_alpha * np.dot( np.array(X)[:, 1:-3], np.array(X)[:, 1:-3].T) B = theta.sig_beta * np.diag(np.ones(len(X))) C = theta.sig_zeta D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m) return theta.rho_matrix(X) * (A + B + C + D) def cov(X0, Y, theta): A = theta.sig_alpha * np.dot( np.array(X0)[:, :-3], np.array(X0)[:, :-3].T) B = theta.sig_beta * np.diag(np.ones(len(X0))) C = theta.sig_zeta D = mk52(np.array(X0)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m) Kx = A + B + C + D Ks = np.array([ np.array( [theta.rho[str(sorted([i, j]))] for j in range(theta.n_IS)]) for i in range(theta.n_IS) ]) if theta.normalize_Ks: Ks = Ks / np.linalg.norm(Ks) e = np.diag(np.array([theta.e1, theta.e2])) Ks = e.dot(Ks.dot(e)) return np.kron(Ks, Kx) sim.cov = cov sim.theta.bounds = {} sim.theta.mu_alpha, sim.theta.bounds['mu_alpha'] = None, ( 1E-3, lambda _, Y: max(Y)) sim.theta.sig_alpha, sim.theta.bounds['sig_alpha'] = None, ( 1E-2, lambda _, Y: 10.0 * np.var(Y)) sim.theta.sig_beta, sim.theta.bounds['sig_beta'] = None, ( 1E-2, lambda _, Y: 10.0 * np.var(Y)) sim.theta.mu_zeta, sim.theta.bounds['mu_zeta'] = None, ( 1E-3, lambda _, Y: max(Y)) sim.theta.sig_zeta, sim.theta.bounds['sig_zeta'] = None, ( 1E-2, lambda _, Y: 10.0 * np.var(Y)) sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (1E-2, lambda _, Y: np.var(Y)) sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1) sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1) sim.theta.e1, sim.theta.bounds['e1'] = None, (1E-1, 1.0) sim.theta.e2, sim.theta.bounds['e2'] = None, (1E-1, 1.0) # # NOTE! This is a reserved keyword in misoKG. We will generate a list of the same length # # of the information sources, and use this for scaling our IS. sim.theta.rho = {"[0, 0]": 1.0, "[0, 1]": 0.96, "[1, 1]": 1.0} sim.theta.bounds['rho [0, 0]'] = (0.1, 1.0) sim.theta.bounds['rho [0, 1]'] = (0.1, 1.0) sim.theta.bounds['rho [1, 1]'] = (0.1, 1.0) sim.theta.set_hp_names() sim.primary_rho_opt = False sim.update_hp_only_with_IS0 = False sim.update_hp_only_with_overlapped = False sim.theta.normalize_L = False sim.theta.normalize_Ks = False # This was a test feature that actually over-wrote rho to be PSD # sim.force_rho_psd = True sim.recommendation_kill_switch = "FAPbBrBrCl_THTO_0" ################################################################################################### # Start simulation sim.run()
1.0 ] sim.obj_vs_cost_fname = "obj_vs_cost_misokg.dat" sim.save_extra_files = True # sim.historical_nsample = 10 ######################################## # Override the possible combinations with the reduced list of IS0 sim.combinations = [k[1] + "Pb" + k[0] + "_" + k[2] + "_" + str(IS) for k in [key.split() for key in IS0.keys()] for IS in range(len(sim.IS))] combos_no_IS = [k[1] + "Pb" + k[0] + "_" + k[2] for k in [key.split() for key in IS0.keys()]] # Because we do this, we should also generate our own historical sample sim.historical_nsample = len(combos_no_IS) choices = combos_no_IS tmp_data = pal_strings.alphaToNum( choices, solvents, mixed_halides=True, name_has_IS=False) data = [] for IS in range(len(sim.IS)): for i, d in enumerate(tmp_data): h, c, _, s, _ = pal_strings.parseName(pal_strings.parseNum(d, solvents, mixed_halides=True, num_has_IS=False), name_has_IS=False) c = c[0] data.append([IS] + d + [sim.IS[IS](h, c, s)]) IS0 = np.array([x[-1] for x in data if x[0] == 0]) IS1 = np.array([x[-1] * 1.8 for x in data if x[0] == 1]) IS0, IS1 = zip(*sorted(zip(IS0, IS1)))
def run_misokg(run_index): # Store data for debugging IS0 = pickle.load(open("enthalpy_N1_R3_Ukcal-mol", 'r')) IS1 = pickle.load(open("enthalpy_N1_R2_Ukcal-mol", 'r')) # Generate the main object sim = Optimizer() # Assign simulation properties #sim.hyperparameter_objective = MAP sim.hyperparameter_objective = MLE ################################################################################################### # File names sim.fname_out = "enthalpy_misokg.dat" sim.fname_historical = None # Information sources, in order from expensive to cheap sim.IS = [ lambda h, c, s: -1.0 * IS0[' '.join([''.join(h), c, s])], lambda h, c, s: -1.0 * IS1[' '.join([''.join(h), c, s])] ] sim.costs = [ 1.0, 0.1, ] sim.logger_fname = "data_dumps/%d_misokg.log" % run_index if os.path.exists(sim.logger_fname): os.system("rm %s" % sim.logger_fname) os.system("touch %s" % sim.logger_fname) sim.obj_vs_cost_fname = "data_dumps/%d_misokg.dat" % run_index sim.mu_fname = "data_dumps/%d_mu_misokg.dat" % run_index sim.sig_fname = "data_dumps/%d_sig_misokg.dat" % run_index sim.combos_fname = "data_dumps/%d_combos_misokg.dat" % run_index sim.hp_fname = "data_dumps/%d_hp_misokg.dat" % run_index sim.acquisition_fname = "data_dumps/%d_acq_misokg.dat" % run_index sim.save_extra_files = True ######################################## # Override the possible combinations with the reduced list of IS0 # Because we do this, we should also generate our own historical sample combos_no_IS = [ k[1] + "Pb" + k[0] + "_" + k[2] for k in [key.split() for key in IS0.keys()] ] #sim.historical_nsample = 240 sim.historical_nsample = 10 choices = np.random.choice(combos_no_IS, sim.historical_nsample, replace=False) tmp_data = pal_strings.alphaToNum(choices, solvents, mixed_halides=True, name_has_IS=False) data = [] for IS in range(len(sim.IS)): for i, d in enumerate(tmp_data): h, c, _, s, _ = pal_strings.parseName(pal_strings.parseNum( d, solvents, mixed_halides=True, num_has_IS=False), name_has_IS=False) c = c[0] data.append([IS] + d + [sim.IS[IS](h, c, s)]) sim.fname_historical = "data_dumps/%d.history" % run_index pickle.dump(data, open(sim.fname_historical, 'w')) simple_data = [d for d in data if d[0] == 0] pickle.dump(simple_data, open("data_dumps/%d_reduced.history" % run_index, 'w')) ######################################## sim.n_start = 10 # The number of starting MLE samples sim.reopt = 10 sim.ramp_opt = None sim.parallel = False # Possible compositions by default sim.A = ["Cs", "MA", "FA"] sim.B = ["Pb"] sim.X = ["Cl", "Br", "I"] sim.solvents = copy.deepcopy(solvents) sim.S = list(set([v["name"] for k, v in sim.solvents.items()])) sim.mixed_halides = True sim.mixed_solvents = False # Parameters for debugging and overwritting sim.debug = False sim.verbose = True sim.overwrite = True # If True, warning, else Error sim.acquisition = getNextSample_misokg # Functional forms of our mean and covariance # MEAN: 4 * mu_alpha + mu_zeta # COV: sig_alpha * |X><X| + sig_beta * I_N + sig_zeta + MaternKernel(S, weights, sig_m) SCALE = [2.0, 4.0][int(sim.mixed_halides)] # _1, _2, _3 used as dummy entries def mean(X, Y, theta): mu = np.array([SCALE * theta.mu_alpha + theta.mu_zeta for _ in Y]) return mu sim.mean = mean def cov_old(X, Y, theta): A = theta.sig_alpha * np.dot( np.array(X)[:, 1:-3], np.array(X)[:, 1:-3].T) B = theta.sig_beta * np.diag(np.ones(len(X))) C = theta.sig_zeta D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m) return theta.rho_matrix(X) * (A + B + C + D) def cov_old2(X, Y, theta): A = theta.sig_alpha * np.dot( np.array(X)[:, 1:-3], np.array(X)[:, 1:-3].T) B = theta.sig_beta * np.diag(np.ones(len(X))) C = theta.sig_zeta D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m) return theta.rho_matrix(X, use_psd=True) * (A + B + C + D) def cov_new(X, Y, theta): # Get a list of all unique X, removing initial IS identifier X0 = [] for x in X: if not any( [all([a == b for a, b in zip(x[1:], xchk)]) for xchk in X0]): X0.append(x[1:]) A = theta.sig_alpha * np.dot( np.array(X0)[:, :-3], np.array(X0)[:, :-3].T) B = theta.sig_beta * np.diag(np.ones(len(X0))) C = theta.sig_zeta D = mk52(np.array(X0)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m) Kx = A + B + C + D L = np.array([ np.array([ theta.rho[str(sorted([i, j]))] if i >= j else 0.0 for j in range(theta.n_IS) ]) for i in range(theta.n_IS) ]) # Normalize L to stop over-scaling values small L = L / np.linalg.norm(L) # Force it to be positive semi-definite Ks = L.dot(L.T) return np.kron(Ks, Kx) #K = np.kron(Ks, Kx) # Now, we get the sub-covariance matrix for the specified sampled X and Y indices = [] for l in range(theta.n_IS): for i, x in enumerate(X0): test = [l] + list(x) if any( [all([a == b for a, b in zip(test, xchk)]) for xchk in X]): indices.append(l * len(X0) + i) K_local = K[np.ix_(indices, indices)] return K_local sim.cov = cov_new sim.theta.bounds = {} sim.theta.mu_alpha, sim.theta.bounds['mu_alpha'] = None, ( 1E-3, lambda _, Y: max(Y)) sim.theta.sig_alpha, sim.theta.bounds['sig_alpha'] = None, ( 1E-2, lambda _, Y: 10.0 * np.var(Y)) sim.theta.sig_beta, sim.theta.bounds['sig_beta'] = None, ( 1E-2, lambda _, Y: 10.0 * np.var(Y)) sim.theta.mu_zeta, sim.theta.bounds['mu_zeta'] = None, ( 1E-3, lambda _, Y: max(Y)) sim.theta.sig_zeta, sim.theta.bounds['sig_zeta'] = None, ( 1E-2, lambda _, Y: 10.0 * np.var(Y)) sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (1E-2, lambda _, Y: np.var(Y)) sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1) sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1) # # NOTE! This is a reserved keyword in misoKG. We will generate a list of the same length # # of the information sources, and use this for scaling our IS. # sim.theta.rho = {"[0, 0]": 1, "[0, 1]": None, "[1, 1]": 1} # sim.theta.bounds['rho [0, 1]'] = (-1.0, 1.0) # sim.theta.bounds['rho [0, 0]'] = (1, 1) # sim.theta.bounds['rho [1, 1]'] = (1, 1) sim.theta.rho = {"[0, 0]": None, "[0, 1]": None, "[1, 1]": None} sim.theta.bounds['rho [0, 0]'] = (0.1, 1.0) sim.theta.bounds['rho [0, 1]'] = (0.1, 1.0) sim.theta.bounds['rho [1, 1]'] = (0.1, 1.0) sim.theta.set_hp_names() sim.primary_rho_opt = False #sim.update_hp_only_with_IS0 = True sim.update_hp_only_with_overlapped = True ################################################################################################### # Start simulation sim.run()