def getSamples(N_samples, eps=True, rho=True): if not rho and not eps: raise Exception("Error - Either eps or rho should be specified.") samples = doe_lhs.lhs(5, N_samples) trio = lambda v: [ int(v > (chk - 1.0 / 3.0) and v <= chk) for chk in [1. / 3., 2. / 3., 1.0] ] solvent_ranges = [ i * 1.0 / len(solvents) for i in range(1, len(solvents) + 1) ] solv = lambda v: solvent_names[[v <= s for s in solvent_ranges].index(True)] samples = [ trio(s[0]) + trio(s[1]) + trio(s[2]) + trio(s[3]) + [ solvents[solv(s[-1])]["density"], solvents[solv( s[-1])]["dielectric"], solvents[solv(s[-1])]["index"] ] for s in samples ] if not rho: samples = [s[:-3] + s[-2:] for s in samples] if not eps: samples = [s[:-2] + [s[-1]] for s in samples] # order the first three trios by Br -> Cl -> I for i, sample in enumerate(samples): t0 = sample[0:3] t1 = sample[3:6] t2 = sample[6:9] ts = [t0, t1, t2] char = [] for t in ts: char.append(["Br", "Cl", "I"][t.index(1)]) dat = [(c, t) for c, t in zip(char, ts)] dat = sorted(dat, key=lambda x: x[0]) samples[i] = dat[0][1] + dat[1][1] + dat[2][1] + sample[9:] # Ensure no duplicates samples = [tuple(s) for s in samples] samples = [list(s) for s in set(samples)] return samples
def create_lhs( N_combinations, sample_max=[10, 10, 1000, 1000, 1, 10, 10, 10, 100000, 5, 2, 15, 15], sample_min=[0, 0, 0, 0, -1, 0, 0, 0, 0, 2, 0, 0, 0], num_samples=1): """ Generate a latin hypercube sample for the tersoff parameters. The order in which parameters are stored are as follows: gamma, lambda3, c, d, cos(theta_0), n, beta, lambda2, B, R, D, lambda1, A **Parameters** sample_max: *list, float, optional* The maximum for each of the 13 tersoff parameters. sample_min: *list, float, optional* The minimum for each of the 13 tersoff parameters. num_samples: *int, optional* The number of samples you wish to generate. **Returns** Stuff. """ # Since there are 27 total entires and 13 parameters per entry we must # sample 351 numbers N_points = N_combinations * 13 lhs_points = lhs.lhs(N_points, num_samples) lhs_list = [] OldRange = 1 OldMin = 0 for sample in lhs_points: sample_list = [] i = 0 for OldValue in sample: # Gamma if (i == 0): # This lets us append a 1 or 3 to the front for the m variable # without changing LHS applicability sample_list.append(random.choice([1, 3])) NewValue = ( ( (OldValue - OldMin) * (sample_max[i] - sample_min[i]) ) / OldRange ) + sample_min[i] i += 1 # R elif (i == 9): NewValue = ( ( (OldValue - OldMin) * (sample_max[i] - sample_min[i]) ) / OldRange ) + sample_min[i] i += 1 R = NewValue # D elif (i == 10): NewValue = ( ( (OldValue - OldMin) * (sample_max[i] - sample_min[i]) ) / OldRange ) + sample_min[i] i += 1 if NewValue > R: raise Exception("Generated D>R, which is unrealistic.") # A elif (i == 12): NewValue = ( ( (OldValue - OldMin) * (sample_max[i] - sample_min[i]) ) / OldRange ) + sample_min[i] i = 0 else: NewValue = ( ( (OldValue - OldMin) * (sample_max[i] - sample_min[i]) ) / OldRange ) + sample_min[i] i += 1 sample_list.append(NewValue) lhs_list.append(sample_list) return lhs_list
def create_lhs(N_points, N_samples, sample_bounds, params=None): ''' Generate a latin hypercube sample for an n dimensional space specified by the sample_bounds keyword. An example is to call create_lhs to sample the lennard jones parameter space: # Assuming we want to sample 5 times parameters = create_lhs(2, 5, [(0, 10), (0, 10)]) **Parameters** N_points: *int* The dimensionality of our system. N_samples: *int* How many samples we want to do. sample_bounds: *list, tuple, float* or *list, list, int/float* The min and max values for each dimension. Note, in special cases we may want to specify that a value is discrete from a list, or specifically an integer. Finally, if neither a list nor tuple is passed, we assume the value is static. Thus, all the following cases are allowed: * [(0, 10), (3, 20), (-3, 2)] * [3, [2, 3], (-5., 2.3, float)] In the second case, the first parameter is set to 3, the second parameter is chosen as either 2 or 3, and the third parameter is force cast to a float. params: *list, str, optional* Current return is a list of lists, each holding the randomly chosen N_points. However, by specifying params, the return can be made into a dictionary, with each point associated with the string in params. Note, this is one to one with the sample_bounds. That is, params[i] has the bounds specified by sample_bounds[i]. **Returns** params: *list, dict/list, float* A list of lists, each holding a 1D array of points chosen from the LHC method. Note, if params was specified then instead a list of dictionaries is returned. ''' # Generate our sample list. lhs_points is a list of N_samples, each # being a tuple of N_points. Note, lhs returns in the range of 0 to 1. lhs_points = lhs.lhs(N_points, N_samples) lhs_list = [] # Copy over sample bounds so we don't change it bounds = copy.deepcopy(sample_bounds) # Ensure all tuple bounds have a type for the third index for i, bound in enumerate(bounds): if isinstance(bound, tuple) and len(bound) == 2: bounds[i] = (bound[0], bound[1], float) # Loop through all the samples for sample in lhs_points: new_params = [] # Loop through all the randomized points in the samples for point, bound in zip(sample, bounds): # Scale to be in the desired range if isinstance(bound, tuple): low, high, use_type = bound new_params.append(use_type(point * (high - low) + low)) # Or, in the case that we can only have something from a list, # choose randomly elif isinstance(bound, list): new_params.append(random.choice(bound)) # Or, in the case that it is a static value, leave as is. else: new_params.append(bound) if params is not None: new_params = {p: np for p, np in zip(params, new_params)} lhs_list.append(new_params) return lhs_list
def MLE(x, y, sp, n_start=None, method=None): ''' Given sampled data, use the maximum likelihood-estimator to find hyperparameters. **Returns** hyperparams: *list, float/int* ''' # ASSUME: HP = [mu_alpha, sig_alpha, sig_beta, mu_zeta, sig_zeta, sig_m, l1, l2] if n_start is None: n_start = NUMBER_OF_RANDOM_STARTING_PARAMETERS # How many samples we do for MLE bounds = [(1E-3, max(y)), (1E-3, np.var(y)), (1E-3, np.var(y)), (1E-3, max(y)), (1E-3, np.var(y)), (1E-3, np.var(y)), (1E-3, 1), (1E-3, 1)] if len(sp[0]) == 1: bounds = bounds[:-2] + [(1.0, 1.0)] if method is not None and method == 'simple': bounds = [ (1E-3, 1), # h1_Br (1E-3, 1), # h1_Cl (1E-3, 1), # h1_I (1E-3, 1), # h2_Br (1E-3, 1), # h2_Cl (1E-3, 1), # h2_I (1E-3, 1), # h3_Br (1E-3, 1), # h3_Cl (1E-3, 1), # h3_I (1E-3, 1), # c_Cs (1E-3, 1), # c_FA (1E-3, 1), # c_MA (1E-3, 1), # Dielectric (1E-3, 1), # sigma (1E-3, max(y)) # constant prior ] elif method is not None and method == "hutter": bounds = [ (1E-3, 1), # h1 (1E-3, 1), # h2 (1E-3, 1), # h3 (1E-3, 1), # c (1E-3, 1), # Dielectric (1E-3, 1), # sigma (1E-3, max(y)) # constant prior ] sampled_values = doe_lhs.lhs(len(bounds), samples=n_start) init_values = [[ s * (b[1] - b[0]) + b[0] for s, b in zip(sampled_values[j], bounds) ] for j in range(n_start)] mle_list = np.zeros([n_start, len(bounds)]) lkh_list = np.zeros(n_start) # MLE = Maximum Likelihood Estimation. But we use a minimizer! So invert the # likelihood instead. f = lambda *args: -1.0 * likelihood(x, y, sp, method, *args) # For each possible starting of parameters, minimize and store the resulting likelihood for i in range(n_start): results = op.minimize(f, init_values[i], bounds=bounds) mle_list[i, :] = results['x'] # Store the optimized parameters lkh_list[i] = results.fun # Store the resulting likelihood # Now, select parameters for the max likelihood of these index = np.nanargmin( lkh_list ) # Note, min because we inverted the likelihood so we can use a minimizer. best_theta = mle_list[index, :] return best_theta
def MLE_parallel(x, y, sp, n_start=None, method=None): ''' Given sampled data, use the maximum likelihood-estimator to find hyperparameters. **Returns** hyperparams: *list, float/int* ''' # ASSUME: HP = [mu_alpha, sig_alpha, sig_beta, mu_zeta, sig_zeta, sig_m, l1, l2] import numpy as np if n_start is None: n_start = NUMBER_OF_RANDOM_STARTING_PARAMETERS # How many samples we do for MLE bounds = [(1E-3, max(y)), (1E-3, np.var(y)), (1E-3, np.var(y)), (1E-3, max(y)), (1E-3, np.var(y)), (1E-3, np.var(y)), (1E-3, 1), (1E-3, 1)] if len(sp[0]) == 1: bounds = bounds[:-2] + [(1.0, 1.0)] if method is not None and method == "simple": bounds = [ (1E-3, 1), # h1_Br (1E-3, 1), # h1_Cl (1E-3, 1), # h1_I (1E-3, 1), # h2_Br (1E-3, 1), # h2_Cl (1E-3, 1), # h2_I (1E-3, 1), # h3_Br (1E-3, 1), # h3_Cl (1E-3, 1), # h3_I (1E-3, 1), # c_Cs (1E-3, 1), # c_FA (1E-3, 1), # c_MA (1E-3, 1), # Dielectric (1E-3, 1), # sigma (1E-3, max(2E-3, max(y))) # constant prior ] elif method is not None and method == "hutter": bounds = [ (1E-3, 1), # h1 (1E-3, 1), # h2 (1E-3, 1), # h3 (1E-3, 1), # c (1E-3, 1), # Dielectric (1E-3, 1), # sigma (1E-3, max(2E-3, max(y))) # constant prior ] sampled_values = doe_lhs.lhs(len(bounds), samples=n_start) init_values = [ (x, y, sp, [s * (b[1] - b[0]) + b[0] for s, b in zip(sampled_values[j], bounds)], bounds) for j in range(n_start) ] mle_list = np.zeros([n_start, len(bounds)]) lkh_list = np.zeros(n_start) pool = mp.Pool(processes=PROCESSES_ALLOWED) if method is not None and method == "simple": all_res = pool.map(opt_hps_parallel_simple, init_values) elif method is not None and method == "hutter": all_res = pool.map(opt_hps_parallel_hutter, init_values) else: all_res = pool.map(opt_hps_parallel, init_values) pool.terminate() for i, res in zip(range(n_start), all_res): mle_list[i, :], lkh_list[i] = res # Now, select parameters for the max likelihood of these index = np.nanargmin( lkh_list ) # Note, min because we inverted the likelihood so we can use a minimizer. best_theta = mle_list[index, :] return best_theta
def create_lhs( N_combinations, sample_max=[10, 10, 1000, 1000, 1, 10, 10, 10, 100000, 5, 2, 15, 15], sample_min=[0, 0, 0, 0, -1, 0, 0, 0, 0, 2, 0, 0, 0], num_samples=1): """ Generate a latin hypercube sample for the tersoff parameters. The order in which parameters are stored are as follows: gamma, lambda3, c, d, cos(theta_0), n, beta, lambda2, B, R, D, lambda1, A **Parameters** sample_max: *list, float, optional* The maximum for each of the 13 tersoff parameters. sample_min: *list, float, optional* The minimum for each of the 13 tersoff parameters. num_samples: *int, optional* The number of samples you wish to generate. **Returns** Stuff. """ # Since there are 27 total entires and 13 parameters per entry we must # sample 351 numbers N_points = N_combinations * 13 lhs_points = lhs.lhs(N_points, num_samples) lhs_list = [] OldRange = 1 OldMin = 0 for sample in lhs_points: sample_list = [] i = 0 for OldValue in sample: # Gamma if (i == 0): # This lets us append a 1 or 3 to the front for the m variable # without changing LHS applicability sample_list.append(random.choice([1, 3])) NewValue = (((OldValue - OldMin) * (sample_max[i] - sample_min[i])) / OldRange) + sample_min[i] i += 1 # R elif (i == 9): NewValue = (((OldValue - OldMin) * (sample_max[i] - sample_min[i])) / OldRange) + sample_min[i] i += 1 R = NewValue # D elif (i == 10): NewValue = (((OldValue - OldMin) * (sample_max[i] - sample_min[i])) / OldRange) + sample_min[i] i += 1 if NewValue > R: raise Exception("Generated D>R, which is unrealistic.") # A elif (i == 12): NewValue = (((OldValue - OldMin) * (sample_max[i] - sample_min[i])) / OldRange) + sample_min[i] i = 0 else: NewValue = (((OldValue - OldMin) * (sample_max[i] - sample_min[i])) / OldRange) + sample_min[i] i += 1 sample_list.append(NewValue) lhs_list.append(sample_list) return lhs_list