def build_map(self, GP): self.update_bounds(GP) if not self.load_grid: y_mean, y_var = self.GenGrid(GP) else: if "mgp_grids" not in os.listdir(self.load_grid): raise FileNotFoundError( "Please set 'load_grid' as the location of mgp_grids folder" ) grid_path = f"{self.load_grid}/mgp_grids/{self.bodies}_{self.species_code}" y_mean = np.load(f"{grid_path}_mean.npy") y_var = np.load(f"{grid_path}_var.npy", allow_pickle=True) self.mean.set_values(y_mean) if self.var_map == "pca" and self.svd_rank == "auto": self.var = PCASplines( self.bounds[0], self.bounds[1], orders=self.grid_num, svd_rank=np.min(y_var.shape), ) if self.var_map is not None: self.var.set_values(y_var) self.hyps_mask = deepcopy(GP.hyps_mask)
def build_map(self, GP): self.update_bounds(GP) y_mean, y_var = self.GenGrid(GP) self.mean.set_values(y_mean) if self.var_map == "pca": G = np.prod(y_var.shape[:-1]) full_rank = np.min((G, y_var.shape[-1])) if self.svd_rank == "auto": self.var = PCASplines( self.bounds[0], self.bounds[1], orders=self.grid_num, svd_rank=full_rank, ) else: assert isinstance(self.svd_rank, int), "Please set svd_rank to int or 'auto'" assert self.svd_rank <= full_rank, f"svd_rank={self.svd_rank} exceeds full_rank={full_rank}" self.var = PCASplines( self.bounds[0], self.bounds[1], orders=self.grid_num, svd_rank=self.svd_rank, ) if self.var_map is not None: self.var.set_values(y_var) self.hyps_mask = deepcopy(GP.hyps_mask)
def build_map_container(self): """ build 1-d spline function for mean, 2-d for var """ self.mean = CubicSpline(self.bounds[0], self.bounds[1], orders=self.grid_num) if self.var_map == "pca": if self.svd_rank == "auto": warnings.warn( "The containers for variance are not built because svd_rank='auto'" ) elif isinstance(self.svd_rank, int): self.var = PCASplines( self.bounds[0], self.bounds[1], orders=self.grid_num, svd_rank=self.svd_rank, ) if self.var_map == "simple": self.var = CubicSpline(self.bounds[0], self.bounds[1], orders=self.grid_num)
def build_map_container(self): """ build 1-d spline function for mean, 2-d for var """ if np.any(np.array(self.bounds[1]) <= 0.0): bounds = [ np.zeros_like(self.bounds[0]), np.ones_like(self.bounds[1]) ] else: bounds = self.bounds self.mean = CubicSpline(bounds[0], bounds[1], orders=self.grid_num) if self.var_map == "pca": if self.svd_rank == "auto": warnings.warn( "The containers for variance are not built because svd_rank='auto'" ) elif isinstance(self.svd_rank, int): self.var = PCASplines( bounds[0], bounds[1], orders=self.grid_num, svd_rank=self.svd_rank, ) if self.var_map == "simple": self.var = CubicSpline(bounds[0], bounds[1], orders=self.grid_num)
def build_map_container(self): self.mean = CubicSpline(self.l_bounds, self.u_bounds, orders=[self.grid_num]) if not self.mean_only: self.var = PCASplines(self.l_bounds, self.u_bounds, orders=[self.grid_num], svd_rank=self.svd_rank)
def build_map_container(self): # create spline interpolation class object self.mean = CubicSpline(self.l_bounds, self.u_bounds, orders=self.grid_num) if not self.mean_only: self.var = PCASplines(self.l_bounds, self.u_bounds, orders=self.grid_num, svd_rank=self.svd_rank)
def build_map_container(self): ''' build 1-d spline function for mean, 2-d for var ''' self.mean = CubicSpline(self.bounds[0], self.bounds[1], orders=[self.grid_num]) if not self.mean_only: self.var = PCASplines(self.bounds[0], self.bounds[1], orders=[self.grid_num], svd_rank=self.svd_rank)
def build_map_container(self): ''' build 3-d spline function for mean, 3-d for the low rank approximation of L^{-1}k* ''' # create spline interpolation class object self.mean = CubicSpline(self.bounds[0], self.bounds[1], orders=self.grid_num) if not self.mean_only: self.var = PCASplines(self.bounds[0], self.bounds[1], orders=self.grid_num, svd_rank=self.svd_rank)
def build_map_container(self): ''' build 3-d spline function for mean, 3-d for the low rank approximation of L^{-1}k* ''' # create spline interpolation class object nop = self.grid_num[0] noa = self.grid_num[2] self.mean = CubicSpline(self.l_bounds, self.u_bounds, orders=[nop, nop, noa]) if not self.mean_only: self.var = PCASplines(self.l_bounds, self.u_bounds, orders=[nop, nop, noa], svd_rank=self.svd_rank)
def build_map(self, GP): self.update_bounds(GP) y_mean, y_var = self.GenGrid(GP) self.mean.set_values(y_mean) if self.var_map == "pca" and self.svd_rank == "auto": self.var = PCASplines( self.bounds[0], self.bounds[1], orders=self.grid_num, svd_rank=np.min(y_var.shape), ) if self.var_map is not None: self.var.set_values(y_var) self.hyps_mask = deepcopy(GP.hyps_mask)
def build_map(self, y_mean, y_var): ''' build 1-d spline function for mean, 2-d for var ''' self.mean = \ SplinesInterpolation(y_mean, u_bounds=np.array(self.u_bound), l_bounds=np.array(self.l_bound), orders=np.array([self.grid_num])) if not self.mean_only: self.var = \ PCASplines(y_var, u_bounds=np.array(self.u_bound), l_bounds=np.array(self.l_bound), orders=np.array([self.grid_num]), svd_rank=self.svd_rank)
def build_map(self, y_mean, y_var, svd_rank, load_svd): ''' build 3-d spline function for mean, 3-d for the low rank approximation of L^{-1}k* ''' nop = self.grid_num[0] noa = self.grid_num[2] self.mean = SplinesInterpolation(y_mean, u_bounds=self.u_bound, l_bounds=self.l_bound, orders=np.array([nop, nop, noa])) self.var = PCASplines(y_var, u_bounds=self.u_bound, l_bounds=self.l_bound, orders=np.array([nop, nop, noa]), svd_rank=svd_rank, load_svd=load_svd)
def build_map(self, y_mean, y_var): ''' build 1-d spline function for mean, 2-d for var ''' self.mean = SplinesInterpolation(y_mean, u_bounds=np.array(self.u_bound), l_bounds=np.array(self.l_bound), orders=np.array([self.grid_num])) if self.bodies == '2': self.var = SplinesInterpolation( y_var, u_bounds=np.array([self.u_bound, self.u_bound]), l_bounds=np.array([self.l_bound, self.l_bound]), orders=np.array([self.grid_num, self.grid_num])) elif self.bodies == '2+3': self.var = PCASplines(y_var, u_bounds=np.array(self.u_bound), l_bounds=np.array(self.l_bound), orders=np.array([self.grid_num]), svd_rank=self.svd_rank, load_svd=None)
class Map3body: def __init__(self, grid_num, bounds, bond_struc: Structure, svd_rank: int = 0, mean_only: bool = False, load_grid: str = '', update: bool = True, n_cpus=None, n_sample=100): ''' Build 3-body MGP bond_struc: Mock Structure object which contains 3 atoms to get map from ''' self.grid_num = grid_num self.bounds = bounds self.bond_struc = bond_struc self.svd_rank = svd_rank self.mean_only = mean_only self.load_grid = load_grid self.update = update self.n_sample = n_sample spc = bond_struc.coded_species self.species_code = Z_to_element(spc[0]) + '_' + \ Z_to_element(spc[1]) + '_' + Z_to_element(spc[2]) self.kv3name = f'kv3_{self.species_code}' self.build_map_container() self.n_cpus = n_cpus self.bounds = bounds self.mean_only = mean_only def GenGrid(self, GP): ''' To use GP to predict value on each grid point, we need to generate the kernel vector kv whose length is the same as the training set size. 1. We divide the training set into several batches, corresponding to different segments of kv 2. Distribute each batch to a processor, i.e. each processor calculate the kv segment of one batch for all grids 3. Collect kv segments and form a complete kv vector for each grid, and calculate the grid value by multiplying the complete kv vector with GP.alpha ''' if self.n_cpus is None: processes = mp.cpu_count() else: processes = self.n_cpus # ------ get 3body kernel info ------ kernel_info = get_3bkernel(GP) # ------ construct grids ------ n1, n2, n12 = self.grid_num bonds1 = np.linspace(self.bounds[0][0], self.bounds[1][0], n1) bonds2 = np.linspace(self.bounds[0][0], self.bounds[1][0], n2) bonds12 = np.linspace(self.bounds[0][2], self.bounds[1][2], n12) grid_means = np.zeros([n1, n2, n12]) if not self.mean_only: grid_vars = np.zeros([n1, n2, n12, len(GP.alpha)]) else: grid_vars = None env12 = AtomicEnvironment(self.bond_struc, 0, GP.cutoffs) size = len(GP.training_data) if processes == 1: if self.update: raise NotImplementedError("the update function is " "not yet implemented") else: k12_v_all = self._GenGrid_inner(GP.name, 0, size, bonds1, bonds2, bonds12, env12, kernel_info) else: with mp.Pool(processes=processes) as pool: if self.update: raise NotImplementedError("the update function is " "not yet implemented") if self.kv3name in os.listdir(): subprocess.run(['rm', '-rf', self.kv3name]) os.mkdir(self.kv3name) # get the size of saved kv vector kv_filename = f'{self.kv3name}/{0}' if kv_filename in os.listdir(self.kv3name): old_kv_file = np.load(kv_filename + '.npy') last_size = int(old_kv_file[0, 0]) new_kv_file[i, :, :last_size] = old_kv_file k12_v_all = np.zeros( [len(bonds1), len(bonds2), len(bonds12), size * 3]) for i in range(n12): if f'{self.kv3name}/{i}.npy' in os.listdir( self.kv3name): old_kv_file = np.load( f'{self.kv3name}/{i}.npy') last_size = int(old_kv_file[0, 0]) #TODO k12_v_all[] else: last_size = 0 # parallelize based on grids, since usually the number of # the added training points are small ngrids = int(math.ceil(n12 / processes)) nbatch = int(math.ceil(n12 / ngrids)) block_id = [] for ibatch in range(nbatch): s = int(ibatch * processes) e = int(np.min(((ibatch + 1) * processes, n12))) block_id += [(s, e)] k12_slice = [] for ibatch in range(nbatch): k12_slice.append( pool.apply_async(self._GenGrid_inner, args=(GP.name, last_size, size, bonds1, bonds2, bonds12[s:e], env12, kernel_info))) for ibatch in range(nbatch): s, e = block_id[ibatch] k12_v_all[:, :, s:e, :] = k12_slice[ibatch].get() else: block_id, nbatch = partition_c(self.n_sample, size, processes) k12_slice = [] #print('before for', ns, nsample, time.time()) count = 0 base = 0 k12_v_all = np.zeros( [len(bonds1), len(bonds2), len(bonds12), size * 3]) for ibatch in range(nbatch): s, e = block_id[ibatch] k12_slice.append( pool.apply_async(self._GenGrid_inner, args=(GP.name, s, e, bonds1, bonds2, bonds12, env12, kernel_info))) #print('send', ibatch, ns, s, e, time.time()) count += 1 if (count > processes * 2): for ibase in range(count): s, e = block_id[ibase + base] k12_v_all[:, :, :, s * 3:e * 3] = k12_slice[ibase].get() del k12_slice k12_slice = [] count = 0 base = ibatch + 1 if (count > 0): for ibase in range(count): s, e = block_id[ibase + base] k12_v_all[:, :, :, s * 3:e * 3] = k12_slice[ibase].get() del k12_slice pool.close() pool.join() for b12 in range(len(bonds12)): for b1 in range(len(bonds1)): for b2 in range(len(bonds2)): k12_v = k12_v_all[b1, b2, b12, :] grid_means[b1, b2, b12] = np.matmul(k12_v, GP.alpha) if not self.mean_only: grid_vars[b1, b2, b12, :] = solve_triangular(GP.l_mat, k12_v, lower=True) # Construct file names according to current mapping # ------ save mean and var to file ------- np.save('grid3_mean_' + self.species_code, grid_means) np.save('grid3_var_' + self.species_code, grid_vars) return grid_means, grid_vars def _GenGrid_inner(self, name, s, e, bonds1, bonds2, bonds12, env12, kernel_info): ''' Calculate kv segments of the given batch of training data for all grids ''' kernel, en_force_kernel, cutoffs, hyps, hyps_mask = kernel_info # open saved k vector file, and write to new file size = (e - s) * 3 k12_v = np.zeros([len(bonds1), len(bonds2), len(bonds12), size]) for b12, r12 in enumerate(bonds12): for b1, r1 in enumerate(bonds1): for b2, r2 in enumerate(bonds2): env12.bond_array_3 = np.array([[r1, 1, 0, 0], [r2, 0, 0, 0]]) env12.cross_bond_dists = np.array([[0, r12], [r12, 0]]) k12_v[b1, b2, b12, :] = en_kern_vec(name, s, e, env12, en_force_kernel, hyps, cutoffs, hyps_mask) # open saved k vector file, and write to new file if self.update: raise NotImplementedError("the update function is not yet"\ "implemented") s, e = block chunk = e - s new_kv_file = np.zeros( (chunk, self.grid_num[0] * self.grid_num[1] + 1, total_size)) new_kv_file[:, 0, 0] = np.ones(chunk) * total_size for i in range(s, e): kv_filename = f'{self.kv3name}/{i}' if kv_filename in os.listdir(self.kv3name): old_kv_file = np.load(kv_filename + '.npy') last_size = int(old_kv_file[0, 0]) new_kv_file[i, :, :last_size] = old_kv_file else: last_size = 0 ds = [1, 2, 3] nop = self.grid_num[0] k12_v = new_kv_file[:, 1:, :] for i in range(s, e): np.save(f'{self.kv3name}/{i}', new_kv_file[i, :, :]) return k12_v def build_map_container(self): ''' build 3-d spline function for mean, 3-d for the low rank approximation of L^{-1}k* ''' # create spline interpolation class object self.mean = CubicSpline(self.bounds[0], self.bounds[1], orders=self.grid_num) if not self.mean_only: self.var = PCASplines(self.bounds[0], self.bounds[1], orders=self.grid_num, svd_rank=self.svd_rank) def build_map(self, GP): # Load grid or generate grid values # If load grid was not specified, will be none if not self.load_grid: y_mean, y_var = self.GenGrid(GP) # If load grid is blank string '' or pre-fix, load in else: y_mean = np.load(self.load_grid+'grid3_mean_'+\ self.species_code+'.npy') y_var = np.load(self.load_grid+'grid3_var_'+\ self.species_code+'.npy') self.mean.set_values(y_mean) if not self.mean_only: self.var.set_values(y_var) def write(self, f, spc): a = self.bounds[0] b = self.bounds[1] order = self.grid_num coefs_3 = self.mean.__coeffs__ elem1 = Z_to_element(spc[0]) elem2 = Z_to_element(spc[1]) elem3 = Z_to_element(spc[2]) header_3 = '{elem1} {elem2} {elem3} {a1} {a2} {a3} {b1}'\ ' {b2} {b3:.10e} {order1} {order2} {order3}\n'\ .format(elem1=elem1, elem2=elem2, elem3=elem3, a1=a[0], a2=a[1], a3=a[2], b1=b[0], b2=b[1], b3=b[2], order1=order[0], order2=order[1], order3=order[2]) f.write(header_3) n = 0 for i in range(coefs_3.shape[0]): for j in range(coefs_3.shape[1]): for k in range(coefs_3.shape[2]): coef = coefs_3[i, j, k] f.write('{:.10e} '.format(coef)) if n % 5 == 4: f.write('\n') n += 1 f.write('\n')
class Map2body: def __init__(self, grid_num, bounds, cutoffs, bond_struc, bodies='2', svd_rank=0, mean_only=False, n_cpus=1, n_sample=100): ''' Build 2-body MGP ''' self.grid_num = grid_num self.l_bounds, self.u_bounds = bounds self.cutoffs = cutoffs self.bond_struc = bond_struc self.species = bond_struc.coded_species self.bodies = bodies self.svd_rank = svd_rank self.mean_only = mean_only self.n_cpus = n_cpus self.n_sample = n_sample self.build_map_container() def GenGrid(self, GP, processes=1): ''' generate grid data of mean prediction and L^{-1}k* for each triplet implemented in a parallelized style ''' kernel_info = get_2bkernel(GP) if (self.n_cpus is None): processes = mp.cpu_count() else: processes = self.n_cpus # ------ construct grids ------ nop = self.grid_num bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop) bond_means = np.zeros([nop]) if not self.mean_only: bond_vars = np.zeros([nop, len(GP.alpha)]) else: bond_vars = None env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs) if processes == 1 : k12_v_all = self._GenGrid_inner(GP.name, 0, len(GP.training_data), bond_lengths, env12, kernel_info) else: with mp.Pool(processes=processes) as pool: size = len(GP.training_data) block_id, nbatch = partition_c(self.n_sample, size, processes) k12_slice = [] k12_v_all = np.zeros([len(bond_lengths), size*3]) count = 0 base = 0 for ibatch in range(nbatch): s, e = block_id[ibatch] k12_slice.append(\ pool.apply_async(self._GenGrid_inner, args=(GP.name, s, e, bond_lengths, env12, kernel_info))) count += 1 # when there are too many threads, collect some of # the result to reduce memory footprint if (count > processes*2): for ibase in range(count): s, e = block_id[ibase+base] k12_v_all[:, s*3:e*3] = k12_slice[ibase].get() del k12_slice k12_slice = [] count = 0 base = ibatch+1 if (count > 0): for ibase in range(count): s, e = block_id[ibase+base] vec = k12_slice[ibase].get() k12_v_all[:, s*3:e*3] = k12_slice[ibase].get() del k12_slice pool.close() pool.join() for b, r in enumerate(bond_lengths): k12_v = k12_v_all[b, :] bond_means[b] = np.matmul(k12_v, GP.alpha) if not self.mean_only: bond_vars[b, :] = solve_triangular(GP.l_mat, k12_v, lower=True) return bond_means, bond_vars def _GenGrid_inner(self, name, s, e, bond_lengths, env12, kernel_info): ''' generate grid for each cos angle, used to parallelize grid generation ''' kernel, efk, cutoffs, hyps, hyps_mask = kernel_info size = e - s k12_v = np.zeros([len(bond_lengths), size*3]) for b, r in enumerate(bond_lengths): env12.bond_array_2 = np.array([[r, 1, 0, 0]]) k12_v[b, :] = get_kernel_vector_unit( name, s, e, env12, 1, kernel, hyps, cutoffs, hyps_mask) return k12_v def build_map_container(self): self.mean = CubicSpline(self.l_bounds, self.u_bounds, orders=[self.grid_num]) if not self.mean_only: self.var = PCASplines(self.l_bounds, self.u_bounds, orders=[self.grid_num], svd_rank=self.svd_rank) def build_map(self, GP): ''' build 1-d spline function for mean, 2-d for var ''' assert (GP.multihyps is False), "multihyps is not supported in mgp" y_mean, y_var = self.GenGrid(GP) self.mean.set_values(y_mean) if not self.mean_only: self.var.set_values(y_var) def write(self, f, spc): ''' Write LAMMPS coefficient file ''' a = self.l_bounds[0] b = self.u_bounds[0] order = self.grid_num coefs_2 = self.mean.__coeffs__ elem1 = Z_to_element(spc[0]) elem2 = Z_to_element(spc[1]) header_2 = '{elem1} {elem2} {a} {b} {order}\n'\ .format(elem1=elem1, elem2=elem2, a=a, b=b, order=order) f.write(header_2) for c, coef in enumerate(coefs_2): f.write('{:.10e} '.format(coef)) if c % 5 == 4 and c != len(coefs_2)-1: f.write('\n') f.write('\n')
class Map2body: def __init__(self, grid_num: int, bounds, bond_struc: Structure, svd_rank=0, mean_only: bool = False, n_cpus: int = None, n_sample: int = 100): ''' Build 2-body MGP bond_struc: Mock structure used to sample 2-body forces on 2 atoms ''' self.grid_num = grid_num self.bounds = bounds self.bond_struc = bond_struc self.svd_rank = svd_rank self.mean_only = mean_only self.n_cpus = n_cpus self.n_sample = n_sample spc = bond_struc.coded_species self.species_code = Z_to_element(spc[0]) + '_' + Z_to_element(spc[1]) # arg_dict = inspect.getargvalues(inspect.currentframe())[3] # del arg_dict['self'] # self.__dict__.update(arg_dict) self.build_map_container() def GenGrid(self, GP): ''' To use GP to predict value on each grid point, we need to generate the kernel vector kv whose length is the same as the training set size. 1. We divide the training set into several batches, corresponding to different segments of kv 2. Distribute each batch to a processor, i.e. each processor calculate the kv segment of one batch for all grids 3. Collect kv segments and form a complete kv vector for each grid, and calculate the grid value by multiplying the complete kv vector with GP.alpha ''' kernel_info = get_2bkernel(GP) if (self.n_cpus is None): processes = mp.cpu_count() else: processes = self.n_cpus # ------ construct grids ------ nop = self.grid_num bond_lengths = np.linspace(self.bounds[0][0], self.bounds[1][0], nop) bond_means = np.zeros([nop]) if not self.mean_only: bond_vars = np.zeros([nop, len(GP.alpha)]) else: bond_vars = None env12 = AtomicEnvironment(self.bond_struc, 0, GP.cutoffs) with mp.Pool(processes=processes) as pool: # A_list = pool.map(self._GenGrid_inner_most, pool_list) # break it into pieces size = len(GP.training_data) block_id, nbatch = partition_c(self.n_sample, size, processes) k12_slice = [] k12_v_all = np.zeros([len(bond_lengths), size * 3]) count = 0 base = 0 for ibatch in range(nbatch): s, e = block_id[ibatch] k12_slice.append( pool.apply_async(self._GenGrid_inner, args=(GP.name, s, e, bond_lengths, env12, kernel_info))) count += 1 if (count > processes * 2): for ibase in range(count): s, e = block_id[ibase + base] k12_v_all[:, s * 3:e * 3] = k12_slice[ibase].get() del k12_slice k12_slice = [] count = 0 base = ibatch + 1 if (count > 0): for ibase in range(count): s, e = block_id[ibase + base] k12_v_all[:, s * 3:e * 3] = k12_slice[ibase].get() del k12_slice pool.close() pool.join() for b, r in enumerate(bond_lengths): k12_v = k12_v_all[b, :] bond_means[b] = np.matmul(k12_v, GP.alpha) if not self.mean_only: bond_vars[b, :] = solve_triangular(GP.l_mat, k12_v, lower=True) write_species_name = '' for x in self.bond_struc.coded_species: write_species_name += "_" + Z_to_element(x) # ------ save mean and var to file ------- np.save('grid2_mean' + write_species_name, bond_means) np.save('grid2_var' + write_species_name, bond_vars) return bond_means, bond_vars def _GenGrid_inner(self, name, s, e, bond_lengths, env12, kernel_info): ''' Calculate kv segments of the given batch of training data for all grids ''' kernel, en_force_kernel, cutoffs, hyps, hyps_mask = kernel_info size = e - s k12_v = np.zeros([len(bond_lengths), size * 3]) for b, r in enumerate(bond_lengths): env12.bond_array_2 = np.array([[r, 1, 0, 0]]) k12_v[b, :] = en_kern_vec(name, s, e, env12, en_force_kernel, hyps, cutoffs, hyps_mask) return k12_v def build_map_container(self): ''' build 1-d spline function for mean, 2-d for var ''' self.mean = CubicSpline(self.bounds[0], self.bounds[1], orders=[self.grid_num]) if not self.mean_only: self.var = PCASplines(self.bounds[0], self.bounds[1], orders=[self.grid_num], svd_rank=self.svd_rank) def build_map(self, GP): y_mean, y_var = self.GenGrid(GP) self.mean.set_values(y_mean) if not self.mean_only: self.var.set_values(y_var) def write(self, f, spc): ''' Write LAMMPS coefficient file ''' a = self.bounds[0][0] b = self.bounds[1][0] order = self.grid_num coefs_2 = self.mean.__coeffs__ elem1 = Z_to_element(spc[0]) elem2 = Z_to_element(spc[1]) header_2 = '{elem1} {elem2} {a} {b} {order}\n'\ .format(elem1=elem1, elem2=elem2, a=a, b=b, order=order) f.write(header_2) for c, coef in enumerate(coefs_2): f.write('{:.10e} '.format(coef)) if c % 5 == 4 and c != len(coefs_2) - 1: f.write('\n') f.write('\n')
class Map3body: def __init__(self, grid_num, bounds, cutoffs, bond_struc, bodies='3', svd_rank=0, mean_only=False, load_grid=None, update=True): ''' Build 3-body MGP ''' self.grid_num = grid_num self.l_bounds, self.u_bounds = bounds self.cutoffs = cutoffs self.bond_struc = bond_struc self.species = bond_struc.coded_species self.bodies = bodies self.svd_rank = svd_rank self.mean_only = mean_only self.load_grid = load_grid self.update = update self.build_map_container() def GenGrid(self, GP, processes=mp.cpu_count()): ''' generate grid data of mean prediction and L^{-1}k* for each triplet implemented in a parallelized style ''' # ------ change GP kernel to 3 body ------ original_kernel = GP.kernel original_hyps = np.copy(GP.hyps) GP.kernel = three_body_mc GP.hyps = GP.hyps[-3:] # ------ construct grids ------ nop = self.grid_num[0] noa = self.grid_num[2] bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop) cos_angles = np.linspace(self.l_bounds[2], self.u_bounds[2], noa) bond_means = np.zeros([nop, nop, noa]) bond_vars = np.zeros([nop, nop, noa, len(GP.alpha)]) env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs) pool_list = [(i, cos_angles[i], bond_lengths, GP, env12, self.update)\ for i in range(noa)] pool = mp.Pool(processes=processes) if self.update: if 'kv3' in os.listdir(): subprocess.run(['rm', '-r', 'kv3']) subprocess.run(['mkdir', 'kv3']) A_list = pool.map(self._GenGrid_inner, pool_list) for a12 in range(noa): bond_means[:, :, a12] = A_list[a12][0] bond_vars[:, :, a12, :] = A_list[a12][1] pool.close() pool.join() # ------ change back to original GP ------ GP.hyps = original_hyps GP.kernel = original_kernel # ------ save mean and var to file ------- np.save('grid3_mean', bond_means) np.save('grid3_var', bond_vars) return bond_means, bond_vars def _GenGrid_inner(self, params): ''' generate grid for each angle, used to parallelize grid generation ''' a12, cos_angle12, bond_lengths, GP, env12, update = params nop = self.grid_num[0] bond_means = np.zeros([nop, nop]) bond_vars = np.zeros([nop, nop, len(GP.alpha)]) # open saved k vector file, and write to new file if update: kv_filename = 'kv3/'+str(a12) size = len(GP.training_data) * 3 new_kv_file = np.zeros((nop**2+1, size)) new_kv_file[0,0] = size if str(a12)+'.npy' in os.listdir('kv3'): old_kv_file = np.load(kv_filename+'.npy') last_size = int(old_kv_file[0,0]) new_kv_file[:, :last_size] = old_kv_file else: last_size = 0 ds = [1, 2, 3] for b1, r1 in enumerate(bond_lengths): r1 = bond_lengths[b1] for b2, r2 in enumerate(bond_lengths): x2 = r2 * cos_angle12 y2 = np.sqrt(r2**2 - x2**2) r12 = np.linalg.norm(np.array([x2-r1, y2, 0])) env12.bond_array_3 = np.array([[r1, 1, 0, 0], [r2, 0, 0, 0]]) env12.cross_bond_dists = np.array([[0, r12], [r12, 0]]) # calculate kernel functions of those newly added training data if update: k12_v = new_kv_file[1+b1*nop+b2, :] for m_index in range(last_size, size): x_2 = GP.training_data[int(math.floor(m_index / 3))] d_2 = ds[m_index % 3] k12_v[m_index] = GP.kernel(env12, x_2, 1, d_2, GP.hyps, GP.cutoffs) else: k12_v = GP.get_kernel_vector(env12, 1) if update: new_kv_file[1+b1*nop+b2, :] = k12_v # calculate mean and var value for the mapping mean_diff = np.matmul(k12_v, GP.alpha) bond_means[b1, b2] = mean_diff if not self.mean_only: v12_vec = solve_triangular(GP.l_mat, k12_v, lower=True) bond_vars[b1, b2, :] = v12_vec # replace the old file with the new file if update: np.save(kv_filename, new_kv_file) return bond_means, bond_vars def build_map_container(self): # create spline interpolation class object nop = self.grid_num[0] noa = self.grid_num[2] self.mean = CubicSpline(self.l_bounds, self.u_bounds, orders=[nop, nop, noa]) if not self.mean_only: self.var = PCASplines(self.l_bounds, self.u_bounds, orders=[nop, nop, noa], svd_rank=self.svd_rank) def build_map(self, GP): ''' build 3-d spline function for mean, 3-d for the low rank approximation of L^{-1}k* ''' # Load grid or generate grid values if not self.load_grid: y_mean, y_var = self.GenGrid(GP) else: y_mean = np.load(self.load_grid+'/grid3_mean.npy') y_var = np.load(self.load_grid+'/grid3_var.npy') self.mean.set_values(y_mean) if not self.mean_only: self.var.set_values(y_var)
class Map3body: def __init__(self, grid_num, bounds, cutoffs, bond_struc, bodies='3', svd_rank=0, mean_only=False, load_grid=None, update=True, n_cpus=1, n_sample=100): ''' Build 3-body MGP ''' self.grid_num = grid_num self.l_bounds, self.u_bounds = bounds self.cutoffs = cutoffs self.bond_struc = bond_struc self.species = bond_struc.coded_species self.bodies = bodies self.svd_rank = svd_rank self.mean_only = mean_only self.load_grid = load_grid self.update = update self.n_cpus = n_cpus self.n_sample = n_sample self.build_map_container() def GenGrid(self, GP): ''' generate grid data of mean prediction and L^{-1}k* for each triplet implemented in a parallelized style ''' if (self.n_cpus is None): processes = mp.cpu_count() else: processes = self.n_cpus if processes == 1: return self.GenGrid_serial(GP) # ------ get 3body kernel info ------ kernel_info = get_3bkernel(GP) # ------ construct grids ------ nop = self.grid_num[0] noa = self.grid_num[2] bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop) cos_angles = np.linspace(self.l_bounds[2], self.u_bounds[2], noa) bond_means = np.zeros([nop, nop, noa]) if not self.mean_only: bond_vars = np.zeros([nop, nop, noa, len(GP.alpha)]) else: bond_vars = None env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs) with mp.Pool(processes=processes) as pool: if self.update: if 'kv3' in os.listdir(): os.rmdir('kv3') os.mkdir('kv3') size = len(GP.training_data) block_id, nbatch = partition_c(self.n_sample, size, processes) k12_slice = [] if (size>5000): print('parallel set up:', size, ns, n_sample, time.time()) count = 0 base = 0 k12_v_all = np.zeros([len(bond_lengths), len(bond_lengths), len(cos_angles), size*3]) for ibatch in range(nbatch): s, e = block_id[ibatch] k12_slice.append(\ pool.apply_async(self._GenGrid_inner_most, args=(GP.name, s, e, cos_angles, bond_lengths, env12, kernel_info))) if (size>5000): print('send', ibatch, ns, s, e, time.time()) count += 1 if (count > processes*2): for ibase in range(count): s, e = block_id[ibase+base] k12_v_all[:, :, :, s*3:e*3] = k12_slice[ibase].get() if (size>5000): print('get', ibase+base) del k12_slice k12_slice = [] count = 0 base = ibatch+1 if (count > 0): for ibase in range(count): s, e = block_id[ibase+base] k12_v_all[:, :, :, s*3:e*3] = k12_slice[ibase].get() del k12_slice pool.close() pool.join() for a12, cos_angle in enumerate(cos_angles): for b1, r1 in enumerate(bond_lengths): for b2, r2 in enumerate(bond_lengths): k12_v = k12_v_all[b1, b2, a12, :] bond_means[b1, b2, a12] = np.matmul(k12_v, GP.alpha) if not self.mean_only: bond_vars[b1, b2, a12, :] = solve_triangular(GP.l_mat, k12_v, lower=True) # # ------ save mean and var to file ------- np.save('grid3_mean', bond_means) np.save('grid3_var', bond_vars) return bond_means, bond_vars def GenGrid_serial(self, GP): ''' generate grid data of mean prediction and L^{-1}k* for each triplet implemented in a parallelized style ''' # ------ get 3body kernel info ------ kernel, efk, cutoffs, hyps, hyps_mask = get_3bkernel(GP) # ------ construct grids ------ nop = self.grid_num[0] noa = self.grid_num[2] bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop) cos_angles = np.linspace(self.l_bounds[2], self.u_bounds[2], noa) bond_means = np.zeros([nop, nop, noa]) bond_vars = np.zeros([nop, nop, noa, len(GP.alpha)]) env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs) if self.update: if 'kv3' in os.listdir(): os.rmdir('kv3') os.mkdir('kv3') size = len(GP.training_data) ds = [1, 2, 3] k_v = np.zeros(3) k12_v_all = np.zeros([len(bond_lengths), len(bond_lengths), len(cos_angles), size*3]) for b1, r1 in enumerate(bond_lengths): for b2, r2 in enumerate(bond_lengths): for a12, cos_angle12 in enumerate(cos_angles): x2 = r2 * cos_angle12 y2 = r2 * np.sqrt(1-cos_angle12**2) r12 = np.linalg.norm(np.array([x2-r1, y2, 0])) env12.bond_array_3 = np.array([[r1, 1, 0, 0], [r2, 0, 0, 0]]) env12.cross_bond_dists = np.array([[0, r12], [r12, 0]]) for isample, sample in enumerate(GP.training_data): for d in ds: k_v[d-1] = kernel(env12, sample, 1, d, hyps, cutoffs) k12_v_all[b1, b2, a12, isample*3:isample*3+3] = k_v for b1, r1 in enumerate(bond_lengths): for b2, r2 in enumerate(bond_lengths): for a12, cos_angle in enumerate(cos_angles): k12_v = k12_v_all[b1, b2, a12, :] bond_means[b1, b2, a12] = np.matmul(k12_v, GP.alpha) if not self.mean_only: bond_vars[b1, b2, a12, :] = solve_triangular(GP.l_mat, k12_v, lower=True) # # ------ save mean and var to file ------- np.save('grid3_mean', bond_means) np.save('grid3_var', bond_vars) return bond_means, bond_vars def _GenGrid_inner_most(self, name, s, e, cos_angles, bond_lengths, env12, kernel_info): ''' generate grid for each cos_angle, used to parallelize grid generation ''' kernel, efk, cutoffs, hyps, hyps_mask = kernel_info training_data = gp_algebra._global_training_data[name] # open saved k vector file, and write to new file size = (e-s)*3 k12_v = np.zeros([len(bond_lengths), len(bond_lengths), len(cos_angles), size]) for a12, cos_angle12 in enumerate(cos_angles): for b1, r1 in enumerate(bond_lengths): for b2, r2 in enumerate(bond_lengths): x2 = r2 * cos_angle12 y2 = r2 * np.sqrt(1-cos_angle12**2) r12 = np.linalg.norm(np.array([x2-r1, y2, 0])) env12.bond_array_3 = np.array([[r1, 1, 0, 0], [r2, 0, 0, 0]]) env12.cross_bond_dists = np.array([[0, r12], [r12, 0]]) k12_v[b1, b2, a12, :] = \ get_kernel_vector_unit(name, s, e, env12, 1, kernel, hyps, cutoffs, hyps_mask) return k12_v def build_map_container(self): # create spline interpolation class object self.mean = CubicSpline(self.l_bounds, self.u_bounds, orders=self.grid_num) if not self.mean_only: self.var = PCASplines(self.l_bounds, self.u_bounds, orders=self.grid_num, svd_rank=self.svd_rank) def build_map(self, GP): ''' build 3-d spline function for mean, 3-d for the low rank approximation of L^{-1}k* ''' assert (GP.multihyps is False), "multihyps is not supported in mgp" # Load grid or generate grid values if not self.load_grid: y_mean, y_var = self.GenGrid(GP) else: y_mean = np.load(self.load_grid+'/grid3_mean.npy') y_var = np.load(self.load_grid+'/grid3_var.npy') self.mean.set_values(y_mean) if not self.mean_only: self.var.set_values(y_var) def write(self, f, spc): a = self.l_bounds b = self.u_bounds order = self.grid_num coefs_3 = self.mean.__coeffs__ elem1 = Z_to_element(spc[0]) elem2 = Z_to_element(spc[1]) elem3 = Z_to_element(spc[2]) header_3 = '{elem1} {elem2} {elem3} {a1} {a2} {a3} {b1}'\ ' {b2} {b3:.10e} {order1} {order2} {order3}\n'\ .format(elem1=elem1, elem2=elem2, elem3=elem3, a1=a[0], a2=a[1], a3=a[2], b1=b[0], b2=b[1], b3=b[2], order1=order[0], order2=order[1], order3=order[2]) f.write(header_3) n = 0 for i in range(coefs_3.shape[0]): for j in range(coefs_3.shape[1]): for k in range(coefs_3.shape[2]): coef = coefs_3[i, j, k] f.write('{:.10e} '.format(coef)) if n % 5 == 4: f.write('\n') n += 1 f.write('\n')
class Map2body: def __init__(self, grid_num, bounds, cutoffs, bond_struc, bodies='2', svd_rank=0, mean_only=False): ''' Build 2-body MGP ''' self.grid_num = grid_num self.l_bounds, self.u_bounds = bounds self.cutoffs = cutoffs self.bond_struc = bond_struc self.species = bond_struc.coded_species self.bodies = bodies self.svd_rank = svd_rank self.mean_only = mean_only self.build_map_container() def GenGrid(self, GP, processes=mp.cpu_count()): ''' generate grid data of mean prediction and L^{-1}k* for each triplet implemented in a parallelized style ''' # ------ change GP kernel to 2 body ------ original_kernel = GP.kernel GP.kernel = two_body_mc original_cutoffs = np.copy(GP.cutoffs) GP.cutoffs = [GP.cutoffs[0]] original_hyps = np.copy(GP.hyps) GP.hyps = [GP.hyps[0], GP.hyps[1], GP.hyps[-1]] # ------ construct grids ------ nop = self.grid_num bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop) bond_means = np.zeros([nop]) bond_vars = np.zeros([nop, len(GP.alpha)]) env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs) pool_list = [(i, bond_lengths, GP, env12) for i in range(nop)] pool = mp.Pool(processes=processes) A_list = pool.map(self._GenGrid_inner, pool_list) for p in range(nop): bond_means[p] = A_list[p][0] bond_vars[p, :] = A_list[p][1] pool.close() pool.join() # ------ change back original GP ------ GP.cutoffs = original_cutoffs GP.hyps = original_hyps GP.kernel = original_kernel return bond_means, bond_vars def _GenGrid_inner(self, params): ''' generate grid for each angle, used to parallelize grid generation ''' b, bond_lengths, GP, env12 = params # nop = self.grid_num r = bond_lengths[b] env12.bond_array_2 = np.array([[r, 1, 0, 0]]) k12_v = GP.get_kernel_vector(env12, 1) mean_diff = np.matmul(k12_v, GP.alpha) bond_means = mean_diff bond_vars = np.zeros(k12_v.shape) if not self.mean_only: v12_vec = solve_triangular(GP.l_mat, k12_v, lower=True) bond_vars = v12_vec return bond_means, bond_vars def build_map_container(self): self.mean = CubicSpline(self.l_bounds, self.u_bounds, orders=[self.grid_num]) if not self.mean_only: self.var = PCASplines(self.l_bounds, self.u_bounds, orders=[self.grid_num], svd_rank=self.svd_rank) def build_map(self, GP): ''' build 1-d spline function for mean, 2-d for var ''' y_mean, y_var = self.GenGrid(GP) self.mean.set_values(y_mean) if not self.mean_only: self.var.set_values(y_var)
class SingleMapXbody: def __init__( self, grid_num: int = 1, bounds="auto", species: list = [], svd_rank=0, var_map: str = None, load_grid=None, lower_bound_relax=0.1, n_cpus: int = None, n_sample: int = 100, **kwargs, ): self.grid_num = grid_num self.bounds = deepcopy(bounds) self.species = species self.svd_rank = svd_rank self.var_map = var_map self.load_grid = load_grid self.lower_bound_relax = lower_bound_relax self.n_cpus = n_cpus self.n_sample = n_sample self.auto_lower = bounds[0] == "auto" if self.auto_lower: lower_bound = None else: lower_bound = bounds[0] self.auto_upper = bounds[1] == "auto" if self.auto_upper: upper_bound = None else: upper_bound = bounds[1] self.set_bounds(lower_bound, upper_bound) self.hyps_mask = None if not self.auto_lower and not self.auto_upper: self.build_map_container() def set_bounds(self, lower_bound, upper_bound): raise NotImplementedError("need to be implemented in child class") def construct_grids(self): raise NotImplementedError("need to be implemented in child class") def LoadGrid(self): if "mgp_grids" not in os.listdir(self.load_grid): raise FileNotFoundError( "Please set 'load_grid' as the location of mgp_grids folder") grid_path = f"{self.load_grid}/mgp_grids/{self.bodies}_{self.species_code}" grid_mean = np.load(f"{grid_path}_mean.npy") grid_vars = np.load(f"{grid_path}_var.npy", allow_pickle=True) return grid_mean, grid_vars def GenGrid(self, GP): """ To use GP to predict value on each grid point, we need to generate the kernel vector kv whose length is the same as the training set size. 1. We divide the training set into several batches, corresponding to different segments of kv 2. Distribute each batch to a processor, i.e. each processor calculate the kv segment of one batch for all grids 3. Collect kv segments and form a complete kv vector for each grid, and calculate the grid value by multiplying the complete kv vector with GP.alpha """ if self.load_grid is not None: return self.LoadGrid() if self.n_cpus is None: processes = mp.cpu_count() else: processes = self.n_cpus # -------- get training data info ---------- n_envs = len(GP.training_data) n_strucs = len(GP.training_structures) if (n_envs == 0) and (n_strucs == 0): warnings.warn("No training data, will return 0") return np.zeros([n_grid]), None # ------ construct grids ------ n_grid = np.prod(self.grid_num) grid_mean = np.zeros([n_grid]) if self.var_map is not None: grid_vars = np.zeros([n_grid, len(GP.alpha)]) else: grid_vars = None # ------- call gengrid functions --------------- kernel_info = get_kernel_term(self.kernel_name, GP.hyps_mask, GP.hyps) args = [GP.name, kernel_info] k12_v_force = self._gengrid_par(args, True, n_envs, processes) k12_v_energy = self._gengrid_par(args, False, n_strucs, processes) k12_v_all = np.hstack([k12_v_force, k12_v_energy]) del k12_v_force del k12_v_energy # ------- compute bond means and variances --------------- grid_mean = k12_v_all @ GP.alpha grid_mean = np.reshape(grid_mean, self.grid_num) if self.var_map is not None: grid_vars = solve_triangular(GP.l_mat, k12_v_all.T, lower=True).T if self.var_map == "simple": self_kern = self._gengrid_var_simple(kernel_info) grid_vars = np.sqrt(self_kern - np.sum(grid_vars**2, axis=1)) grid_vars = np.expand_dims(grid_vars, axis=1) tensor_shape = np.array([*self.grid_num, grid_vars.shape[1]]) grid_vars = np.reshape(grid_vars, tensor_shape) # ------ save mean and var to file ------- if "mgp_grids" not in os.listdir("./"): os.mkdir("mgp_grids") grid_path = f"mgp_grids/{self.bodies}_{self.species_code}" np.save(f"{grid_path}_mean", grid_mean) np.save(f"{grid_path}_var", grid_vars) return grid_mean, grid_vars def _gengrid_par(self, args, force_block, n_envs, processes): if n_envs == 0: n_grid = np.prod(self.grid_num) return np.empty((n_grid, 0)) gengrid_func = self._gengrid_inner if processes == 1: return gengrid_func(*args, force_block, 0, n_envs) with mp.Pool(processes=processes) as pool: block_id, nbatch = partition_vector(self.n_sample, n_envs, processes) k12_slice = [] for ibatch in range(nbatch): s, e = block_id[ibatch] k12_slice.append( pool.apply_async(gengrid_func, args=args + [force_block, s, e])) k12_matrix = [] for ibatch in range(nbatch): k12_matrix += [k12_slice[ibatch].get()] pool.close() pool.join() del k12_slice k12_v_force = np.hstack(k12_matrix) del k12_matrix return k12_v_force def _gengrid_inner(self, name, kernel_info, force_block, s, e): """ Loop over different parts of the training set. from element s to element e Args: name: name of the gp instance s: start index of the training data parition e: end index of the training data parition kernel_info: return value of the get_3b_kernel """ _, cutoffs, hyps, hyps_mask = kernel_info r_cut = cutoffs[self.kernel_name] n_grids = np.prod(self.grid_num) if np.any(np.array(self.bounds[1]) <= 0.0): if force_block: return np.zeros((n_grids, (e - s) * 3)) else: return np.zeros((n_grids, e - s)) grids = self.construct_grids() coords = np.zeros((grids.shape[0], self.grid_dim * 3), dtype=np.float64) # padding 0 coords[:, 0] = np.ones_like(coords[:, 0]) fj, fdj = self.grid_cutoff(grids, r_cut, coords, derivative=True, cutoff_func=cf.quadratic_cutoff) fdj = fdj[:, [0]] if force_block: training_data = _global_training_data[name] kern_type = f"energy_force" else: training_data = _global_training_structures[name] kern_type = f"energy_energy" k_v = [] chunk_size = 32**3 if n_grids > chunk_size: n_chunk = ceil(n_grids / chunk_size) else: n_chunk = 1 for m_index in range(s, e): data = training_data[m_index] kern_vec = [] for g in range(n_chunk): gs = chunk_size * g ge = np.min((chunk_size * (g + 1), n_grids)) grid_chunk = grids[gs:ge, :] fj_chunk = fj[gs:ge, :] fdj_chunk = fdj[gs:ge, :] kv_chunk = self.get_grid_kernel( kern_type, data, kernel_info, grid_chunk, fj_chunk, fdj_chunk, ) kern_vec.append(kv_chunk) kern_vec = np.hstack(kern_vec) k_v.append(kern_vec) if len(k_v) > 0: k_v = np.vstack(k_v).T else: k_v = np.zeros((n_grids, 0)) return k_v def _gengrid_var_simple(self, kernel_info): """ Generate grids for variance upper bound, based on the inequality: V(c, p)^2 <= V(c, c) V(p, p) where c, p are two bonds/triplets or environments """ _, cutoffs, hyps, hyps_mask = kernel_info r_cut = cutoffs[self.kernel_name] grids = self.construct_grids() coords = np.zeros((grids.shape[0], self.grid_dim * 3), dtype=np.float64) # padding 0 coords[:, 0] = np.ones_like(coords[:, 0]) fj, fdj = self.grid_cutoff(grids, r_cut, coords, derivative=True, cutoff_func=cf.quadratic_cutoff) fdj = fdj[:, [0]] return self.get_self_kernel(kernel_info, grids, fj, fdj) def build_map_container(self): """ build 1-d spline function for mean, 2-d for var """ if np.any(np.array(self.bounds[1]) <= 0.0): bounds = [ np.zeros_like(self.bounds[0]), np.ones_like(self.bounds[1]) ] else: bounds = self.bounds self.mean = CubicSpline(bounds[0], bounds[1], orders=self.grid_num) if self.var_map == "pca": if self.svd_rank == "auto": warnings.warn( "The containers for variance are not built because svd_rank='auto'" ) elif isinstance(self.svd_rank, int): self.var = PCASplines( bounds[0], bounds[1], orders=self.grid_num, svd_rank=self.svd_rank, ) if self.var_map == "simple": self.var = CubicSpline(bounds[0], bounds[1], orders=self.grid_num) def update_bounds(self, GP): rebuild_container = False # double check the container and the GP is consistent if not Parameters.compare_dict(GP.hyps_mask, self.hyps_mask): rebuild_container = True lower_bound = self.bounds[0] min_dist = self.search_lower_bound(GP) # change lower bound only when there appears a smaller distance if lower_bound is None or min_dist < np.max(lower_bound): lower_bound = np.max((min_dist - self.lower_bound_relax, 0.0)) rebuild_container = True warnings.warn( "The minimal distance in training data is lower than " f"the current lower bound, will reset lower bound to {lower_bound}" ) upper_bound = self.bounds[1] if self.auto_upper or upper_bound is None: gp_cutoffs = Parameters.get_cutoff(self.kernel_name, self.species, GP.hyps_mask) if upper_bound is None or np.any(gp_cutoffs > upper_bound): upper_bound = gp_cutoffs rebuild_container = True if rebuild_container: self.set_bounds(lower_bound, upper_bound) self.build_map_container() def build_map(self, GP): self.update_bounds(GP) y_mean, y_var = self.GenGrid(GP) self.mean.set_values(y_mean) if self.var_map == "pca" and self.svd_rank == "auto": self.var = PCASplines( self.bounds[0], self.bounds[1], orders=self.grid_num, svd_rank=np.min(y_var.shape), ) if self.var_map is not None: self.var.set_values(y_var) self.hyps_mask = deepcopy(GP.hyps_mask) def __str__(self): info = f"""{self.__class__.__name__} species: {self.species} lower bound: {self.bounds[0]}, auto_lower = {self.auto_lower} upper bound: {self.bounds[1]}, auto_upper = {self.auto_upper} grid num: {self.grid_num} lower bound relaxation: {self.lower_bound_relax} load grid from: {self.load_grid}\n""" if self.var_map is None: info += f" without variance\n" elif self.var_map == "pca": info += f" with PCA variance, svd_rank = {self.svd_rank}\n" elif self.var_map == "simple": info += f" with simple variance" return info def search_lower_bound(self, GP): """ If the lower bound is set to be 'auto', search the minimal interatomic distances in the training set of GP. """ upper_bound = Parameters.get_cutoff(self.kernel_name, self.species, GP.hyps_mask) lower_bound = np.min(upper_bound) training_data = _global_training_data[GP.name] for env in training_data: if len(env.bond_array_2) == 0: continue min_dist = env.bond_array_2[0][0] if min_dist < lower_bound: lower_bound = min_dist training_struc = _global_training_structures[GP.name] for struc in training_struc: for env in struc: if len(env.bond_array_2) == 0: continue min_dist = env.bond_array_2[0][0] if min_dist < lower_bound: lower_bound = min_dist return lower_bound def predict(self, lengths, xyzs): """ predict force and variance contribution of one component """ min_dist = np.min(lengths) if min_dist < np.max(self.bounds[0]): raise ValueError( self.species, min_dist, f"The minimal distance {min_dist:.3f}" f" is below the mgp lower bound {self.bounds[0]}", ) max_dist = np.max(lengths) if max_dist > np.min(self.bounds[1]): raise Exception( self.species, max_dist, f"The atomic environment should have cutoff smaller than the GP cutoff", ) lengths = np.array(lengths) xyzs = np.array(xyzs) n_neigh = self.bodies - 1 # predict forces and energy e_0, f_0 = self.mean(lengths, with_derivatives=True) e = np.sum(e_0) # energy f_d = np.zeros((lengths.shape[0], n_neigh, 3)) for b in range(n_neigh): f_d[:, b, :] = np.diag(f_0[:, b, 0]) @ xyzs[:, b] f = self.bodies * np.sum(f_d, axis=(0, 1)) # predict var v = 0 if self.var_map == "simple": v_0 = self.var(lengths) v = np.sum(v_0) elif self.var_map == "pca": v_0 = self.var(lengths) v_0 = np.sum(v_0, axis=1) v_0 = np.expand_dims(v_0, axis=1) v = self.var.V @ v_0 # predict virial stress vir = np.zeros(6) vir_order = ( (0, 0), (1, 1), (2, 2), (1, 2), (0, 2), (0, 1), ) # match the ASE order for i in range(6): for b in range(n_neigh): vir_i = (f_d[:, b, vir_order[i][0]] * xyzs[:, b, vir_order[i][1]] * lengths[:, b]) vir[i] += np.sum(vir_i) vir *= self.bodies / 2 return f, vir, v, e def write(self, f, write_var, permute=False): """ Write LAMMPS coefficient file This implementation only works for 2b and 3b. User should implement overload in the actual class if the new kernel has different coefficient format In the future, it should be changed to writing in bin/hex instead of decimal """ # write header elems = self.species_code.split("_") a = self.bounds[0] b = self.bounds[1] order = self.grid_num header = " ".join(elems) header += " " + " ".join(map(repr, a)) header += " " + " ".join(map(repr, b)) header += " " + " ".join(map(str, order)) f.write(header + "\n") # write coeffs if write_var: coefs = self.var.__coeffs__ else: coefs = self.mean.__coeffs__ self.write_flatten_coeff(f, coefs) def write_flatten_coeff(self, f, coefs): """ flatten the coefficient and write it as a block. each line has no more than 5 element. the accuracy is restricted to .10 """ coefs = coefs.reshape([-1]) for c, coef in enumerate(coefs): f.write(" " + repr(coef)) if c % 5 == 4 and c != len(coefs) - 1: f.write("\n") f.write("\n")