コード例 #1
0
    def build_map(self, GP):

        self.update_bounds(GP)

        if not self.load_grid:
            y_mean, y_var = self.GenGrid(GP)
        else:
            if "mgp_grids" not in os.listdir(self.load_grid):
                raise FileNotFoundError(
                    "Please set 'load_grid' as the location of mgp_grids folder"
                )

            grid_path = f"{self.load_grid}/mgp_grids/{self.bodies}_{self.species_code}"
            y_mean = np.load(f"{grid_path}_mean.npy")
            y_var = np.load(f"{grid_path}_var.npy", allow_pickle=True)

        self.mean.set_values(y_mean)

        if self.var_map == "pca" and self.svd_rank == "auto":
            self.var = PCASplines(
                self.bounds[0],
                self.bounds[1],
                orders=self.grid_num,
                svd_rank=np.min(y_var.shape),
            )

        if self.var_map is not None:
            self.var.set_values(y_var)

        self.hyps_mask = deepcopy(GP.hyps_mask)
コード例 #2
0
ファイル: mapxb.py プロジェクト: mailhexu/flare
    def build_map(self, GP):

        self.update_bounds(GP)

        y_mean, y_var = self.GenGrid(GP)
        self.mean.set_values(y_mean)

        if self.var_map == "pca":
            G = np.prod(y_var.shape[:-1])
            full_rank = np.min((G, y_var.shape[-1]))

            if self.svd_rank == "auto":
                self.var = PCASplines(
                    self.bounds[0],
                    self.bounds[1],
                    orders=self.grid_num,
                    svd_rank=full_rank,
                )
            else:
                assert isinstance(self.svd_rank, int), "Please set svd_rank to int or 'auto'"
                assert self.svd_rank <= full_rank, f"svd_rank={self.svd_rank} exceeds full_rank={full_rank}"
                self.var = PCASplines(
                    self.bounds[0],
                    self.bounds[1],
                    orders=self.grid_num,
                    svd_rank=self.svd_rank,
                )


        if self.var_map is not None:
            self.var.set_values(y_var)

        self.hyps_mask = deepcopy(GP.hyps_mask)
コード例 #3
0
    def build_map_container(self):
        """
        build 1-d spline function for mean, 2-d for var
        """
        self.mean = CubicSpline(self.bounds[0],
                                self.bounds[1],
                                orders=self.grid_num)

        if self.var_map == "pca":
            if self.svd_rank == "auto":
                warnings.warn(
                    "The containers for variance are not built because svd_rank='auto'"
                )

            elif isinstance(self.svd_rank, int):
                self.var = PCASplines(
                    self.bounds[0],
                    self.bounds[1],
                    orders=self.grid_num,
                    svd_rank=self.svd_rank,
                )

        if self.var_map == "simple":
            self.var = CubicSpline(self.bounds[0],
                                   self.bounds[1],
                                   orders=self.grid_num)
コード例 #4
0
ファイル: mapxb.py プロジェクト: aaronchen0316/flare
    def build_map_container(self):
        """
        build 1-d spline function for mean, 2-d for var
        """
        if np.any(np.array(self.bounds[1]) <= 0.0):
            bounds = [
                np.zeros_like(self.bounds[0]),
                np.ones_like(self.bounds[1])
            ]
        else:
            bounds = self.bounds

        self.mean = CubicSpline(bounds[0], bounds[1], orders=self.grid_num)

        if self.var_map == "pca":
            if self.svd_rank == "auto":
                warnings.warn(
                    "The containers for variance are not built because svd_rank='auto'"
                )

            elif isinstance(self.svd_rank, int):
                self.var = PCASplines(
                    bounds[0],
                    bounds[1],
                    orders=self.grid_num,
                    svd_rank=self.svd_rank,
                )

        if self.var_map == "simple":
            self.var = CubicSpline(bounds[0], bounds[1], orders=self.grid_num)
コード例 #5
0
ファイル: mgp.py プロジェクト: smheidrich/flare
    def build_map_container(self):
        self.mean = CubicSpline(self.l_bounds, self.u_bounds, 
                                orders=[self.grid_num])

        if not self.mean_only:
            self.var = PCASplines(self.l_bounds, self.u_bounds,
                                  orders=[self.grid_num],
                                  svd_rank=self.svd_rank)
コード例 #6
0
    def build_map_container(self):
       # create spline interpolation class object
        self.mean = CubicSpline(self.l_bounds, self.u_bounds,
                                orders=self.grid_num)

        if not self.mean_only:
            self.var = PCASplines(self.l_bounds, self.u_bounds,
                                  orders=self.grid_num,
                                  svd_rank=self.svd_rank)
コード例 #7
0
    def build_map_container(self):
        '''
        build 1-d spline function for mean, 2-d for var
        '''
        self.mean = CubicSpline(self.bounds[0],
                                self.bounds[1],
                                orders=[self.grid_num])

        if not self.mean_only:
            self.var = PCASplines(self.bounds[0],
                                  self.bounds[1],
                                  orders=[self.grid_num],
                                  svd_rank=self.svd_rank)
コード例 #8
0
    def build_map_container(self):
        '''
        build 3-d spline function for mean,
        3-d for the low rank approximation of L^{-1}k*
        '''

        # create spline interpolation class object
        self.mean = CubicSpline(self.bounds[0],
                                self.bounds[1],
                                orders=self.grid_num)

        if not self.mean_only:
            self.var = PCASplines(self.bounds[0],
                                  self.bounds[1],
                                  orders=self.grid_num,
                                  svd_rank=self.svd_rank)
コード例 #9
0
ファイル: mgp.py プロジェクト: mayankaditya/flare
    def build_map_container(self):
        '''
        build 3-d spline function for mean,
        3-d for the low rank approximation of L^{-1}k*
        '''

        # create spline interpolation class object
        nop = self.grid_num[0]
        noa = self.grid_num[2]
        self.mean = CubicSpline(self.l_bounds,
                                self.u_bounds,
                                orders=[nop, nop, noa])

        if not self.mean_only:
            self.var = PCASplines(self.l_bounds,
                                  self.u_bounds,
                                  orders=[nop, nop, noa],
                                  svd_rank=self.svd_rank)
コード例 #10
0
ファイル: mapxb.py プロジェクト: aaronchen0316/flare
    def build_map(self, GP):

        self.update_bounds(GP)

        y_mean, y_var = self.GenGrid(GP)
        self.mean.set_values(y_mean)

        if self.var_map == "pca" and self.svd_rank == "auto":
            self.var = PCASplines(
                self.bounds[0],
                self.bounds[1],
                orders=self.grid_num,
                svd_rank=np.min(y_var.shape),
            )

        if self.var_map is not None:
            self.var.set_values(y_var)

        self.hyps_mask = deepcopy(GP.hyps_mask)
コード例 #11
0
ファイル: mgp.py プロジェクト: nw13slx/flare-RC
    def build_map(self, y_mean, y_var):

        '''
        build 1-d spline function for mean, 2-d for var
        '''

        self.mean = \
            SplinesInterpolation(y_mean, u_bounds=np.array(self.u_bound),
                                 l_bounds=np.array(self.l_bound),
                                 orders=np.array([self.grid_num]))

        if not self.mean_only:
            self.var = \
                PCASplines(y_var, u_bounds=np.array(self.u_bound),
                           l_bounds=np.array(self.l_bound),
                           orders=np.array([self.grid_num]),
                           svd_rank=self.svd_rank)
コード例 #12
0
ファイル: mgp_sc.py プロジェクト: nw13slx/flare-RC
    def build_map(self, y_mean, y_var, svd_rank, load_svd):
        '''
        build 3-d spline function for mean, 
        3-d for the low rank approximation of L^{-1}k*
        '''
        nop = self.grid_num[0]
        noa = self.grid_num[2]
        self.mean = SplinesInterpolation(y_mean,
                                         u_bounds=self.u_bound,
                                         l_bounds=self.l_bound,
                                         orders=np.array([nop, nop, noa]))

        self.var = PCASplines(y_var,
                              u_bounds=self.u_bound,
                              l_bounds=self.l_bound,
                              orders=np.array([nop, nop, noa]),
                              svd_rank=svd_rank,
                              load_svd=load_svd)
コード例 #13
0
ファイル: mgp_sc.py プロジェクト: nw13slx/flare-RC
    def build_map(self, y_mean, y_var):
        '''
        build 1-d spline function for mean, 2-d for var
        '''

        self.mean = SplinesInterpolation(y_mean,
                                         u_bounds=np.array(self.u_bound),
                                         l_bounds=np.array(self.l_bound),
                                         orders=np.array([self.grid_num]))
        if self.bodies == '2':
            self.var = SplinesInterpolation(
                y_var,
                u_bounds=np.array([self.u_bound, self.u_bound]),
                l_bounds=np.array([self.l_bound, self.l_bound]),
                orders=np.array([self.grid_num, self.grid_num]))
        elif self.bodies == '2+3':
            self.var = PCASplines(y_var,
                                  u_bounds=np.array(self.u_bound),
                                  l_bounds=np.array(self.l_bound),
                                  orders=np.array([self.grid_num]),
                                  svd_rank=self.svd_rank,
                                  load_svd=None)
コード例 #14
0
class Map3body:
    def __init__(self,
                 grid_num,
                 bounds,
                 bond_struc: Structure,
                 svd_rank: int = 0,
                 mean_only: bool = False,
                 load_grid: str = '',
                 update: bool = True,
                 n_cpus=None,
                 n_sample=100):
        '''
        Build 3-body MGP

        bond_struc: Mock Structure object which contains 3 atoms to get map
        from
        '''
        self.grid_num = grid_num
        self.bounds = bounds
        self.bond_struc = bond_struc
        self.svd_rank = svd_rank
        self.mean_only = mean_only
        self.load_grid = load_grid
        self.update = update
        self.n_sample = n_sample

        spc = bond_struc.coded_species
        self.species_code = Z_to_element(spc[0]) + '_' + \
                            Z_to_element(spc[1]) + '_' + Z_to_element(spc[2])
        self.kv3name = f'kv3_{self.species_code}'

        self.build_map_container()
        self.n_cpus = n_cpus
        self.bounds = bounds
        self.mean_only = mean_only

    def GenGrid(self, GP):
        '''
        To use GP to predict value on each grid point, we need to generate the
        kernel vector kv whose length is the same as the training set size.

        1. We divide the training set into several batches, corresponding to
           different segments of kv
        2. Distribute each batch to a processor, i.e. each processor calculate
           the kv segment of one batch for all grids
        3. Collect kv segments and form a complete kv vector for each grid,
           and calculate the grid value by multiplying the complete kv vector
           with GP.alpha
        '''

        if self.n_cpus is None:
            processes = mp.cpu_count()
        else:
            processes = self.n_cpus

        # ------ get 3body kernel info ------
        kernel_info = get_3bkernel(GP)

        # ------ construct grids ------
        n1, n2, n12 = self.grid_num
        bonds1 = np.linspace(self.bounds[0][0], self.bounds[1][0], n1)
        bonds2 = np.linspace(self.bounds[0][0], self.bounds[1][0], n2)
        bonds12 = np.linspace(self.bounds[0][2], self.bounds[1][2], n12)
        grid_means = np.zeros([n1, n2, n12])

        if not self.mean_only:
            grid_vars = np.zeros([n1, n2, n12, len(GP.alpha)])
        else:
            grid_vars = None

        env12 = AtomicEnvironment(self.bond_struc, 0, GP.cutoffs)
        size = len(GP.training_data)

        if processes == 1:
            if self.update:
                raise NotImplementedError("the update function is "
                                          "not yet implemented")
            else:
                k12_v_all = self._GenGrid_inner(GP.name, 0, size, bonds1,
                                                bonds2, bonds12, env12,
                                                kernel_info)
        else:
            with mp.Pool(processes=processes) as pool:

                if self.update:

                    raise NotImplementedError("the update function is "
                                              "not yet implemented")

                    if self.kv3name in os.listdir():
                        subprocess.run(['rm', '-rf', self.kv3name])

                    os.mkdir(self.kv3name)

                    # get the size of saved kv vector
                    kv_filename = f'{self.kv3name}/{0}'
                    if kv_filename in os.listdir(self.kv3name):
                        old_kv_file = np.load(kv_filename + '.npy')
                        last_size = int(old_kv_file[0, 0])
                        new_kv_file[i, :, :last_size] = old_kv_file

                        k12_v_all = np.zeros(
                            [len(bonds1),
                             len(bonds2),
                             len(bonds12), size * 3])

                        for i in range(n12):
                            if f'{self.kv3name}/{i}.npy' in os.listdir(
                                    self.kv3name):
                                old_kv_file = np.load(
                                    f'{self.kv3name}/{i}.npy')
                                last_size = int(old_kv_file[0, 0])
                                #TODO k12_v_all[]
                            else:
                                last_size = 0

                        # parallelize based on grids, since usually the number of
                        # the added training points are small
                        ngrids = int(math.ceil(n12 / processes))
                        nbatch = int(math.ceil(n12 / ngrids))

                        block_id = []
                        for ibatch in range(nbatch):
                            s = int(ibatch * processes)
                            e = int(np.min(((ibatch + 1) * processes, n12)))
                            block_id += [(s, e)]

                        k12_slice = []
                        for ibatch in range(nbatch):
                            k12_slice.append(
                                pool.apply_async(self._GenGrid_inner,
                                                 args=(GP.name, last_size,
                                                       size, bonds1, bonds2,
                                                       bonds12[s:e], env12,
                                                       kernel_info)))

                        for ibatch in range(nbatch):
                            s, e = block_id[ibatch]
                            k12_v_all[:, :, s:e, :] = k12_slice[ibatch].get()

                else:
                    block_id, nbatch = partition_c(self.n_sample, size,
                                                   processes)

                    k12_slice = []
                    #print('before for', ns, nsample, time.time())
                    count = 0
                    base = 0
                    k12_v_all = np.zeros(
                        [len(bonds1),
                         len(bonds2),
                         len(bonds12), size * 3])
                    for ibatch in range(nbatch):
                        s, e = block_id[ibatch]
                        k12_slice.append(
                            pool.apply_async(self._GenGrid_inner,
                                             args=(GP.name, s, e, bonds1,
                                                   bonds2, bonds12, env12,
                                                   kernel_info)))
                        #print('send', ibatch, ns, s, e, time.time())
                        count += 1
                        if (count > processes * 2):
                            for ibase in range(count):
                                s, e = block_id[ibase + base]
                                k12_v_all[:, :, :, s * 3:e *
                                          3] = k12_slice[ibase].get()
                            del k12_slice
                            k12_slice = []
                            count = 0
                            base = ibatch + 1
                    if (count > 0):
                        for ibase in range(count):
                            s, e = block_id[ibase + base]
                            k12_v_all[:, :, :,
                                      s * 3:e * 3] = k12_slice[ibase].get()
                        del k12_slice

                pool.close()
                pool.join()

        for b12 in range(len(bonds12)):
            for b1 in range(len(bonds1)):
                for b2 in range(len(bonds2)):
                    k12_v = k12_v_all[b1, b2, b12, :]
                    grid_means[b1, b2, b12] = np.matmul(k12_v, GP.alpha)
                    if not self.mean_only:
                        grid_vars[b1, b2,
                                  b12, :] = solve_triangular(GP.l_mat,
                                                             k12_v,
                                                             lower=True)

        # Construct file names according to current mapping

        # ------ save mean and var to file -------
        np.save('grid3_mean_' + self.species_code, grid_means)
        np.save('grid3_var_' + self.species_code, grid_vars)

        return grid_means, grid_vars

    def _GenGrid_inner(self, name, s, e, bonds1, bonds2, bonds12, env12,
                       kernel_info):
        '''
        Calculate kv segments of the given batch of training data for all grids
        '''

        kernel, en_force_kernel, cutoffs, hyps, hyps_mask = kernel_info
        # open saved k vector file, and write to new file
        size = (e - s) * 3
        k12_v = np.zeros([len(bonds1), len(bonds2), len(bonds12), size])
        for b12, r12 in enumerate(bonds12):
            for b1, r1 in enumerate(bonds1):
                for b2, r2 in enumerate(bonds2):

                    env12.bond_array_3 = np.array([[r1, 1, 0, 0],
                                                   [r2, 0, 0, 0]])
                    env12.cross_bond_dists = np.array([[0, r12], [r12, 0]])
                    k12_v[b1, b2,
                          b12, :] = en_kern_vec(name, s, e, env12,
                                                en_force_kernel, hyps, cutoffs,
                                                hyps_mask)

        # open saved k vector file, and write to new file
        if self.update:

            raise NotImplementedError("the update function is not yet"\
                    "implemented")

            s, e = block
            chunk = e - s
            new_kv_file = np.zeros(
                (chunk, self.grid_num[0] * self.grid_num[1] + 1, total_size))
            new_kv_file[:, 0, 0] = np.ones(chunk) * total_size
            for i in range(s, e):
                kv_filename = f'{self.kv3name}/{i}'
                if kv_filename in os.listdir(self.kv3name):
                    old_kv_file = np.load(kv_filename + '.npy')
                    last_size = int(old_kv_file[0, 0])
                    new_kv_file[i, :, :last_size] = old_kv_file
                else:
                    last_size = 0
            ds = [1, 2, 3]
            nop = self.grid_num[0]

            k12_v = new_kv_file[:, 1:, :]
            for i in range(s, e):
                np.save(f'{self.kv3name}/{i}', new_kv_file[i, :, :])

        return k12_v

    def build_map_container(self):
        '''
        build 3-d spline function for mean,
        3-d for the low rank approximation of L^{-1}k*
        '''

        # create spline interpolation class object
        self.mean = CubicSpline(self.bounds[0],
                                self.bounds[1],
                                orders=self.grid_num)

        if not self.mean_only:
            self.var = PCASplines(self.bounds[0],
                                  self.bounds[1],
                                  orders=self.grid_num,
                                  svd_rank=self.svd_rank)

    def build_map(self, GP):
        # Load grid or generate grid values
        # If load grid was not specified, will be none
        if not self.load_grid:
            y_mean, y_var = self.GenGrid(GP)
        # If load grid is blank string '' or pre-fix, load in
        else:
            y_mean = np.load(self.load_grid+'grid3_mean_'+\
                    self.species_code+'.npy')
            y_var = np.load(self.load_grid+'grid3_var_'+\
                    self.species_code+'.npy')

        self.mean.set_values(y_mean)
        if not self.mean_only:
            self.var.set_values(y_var)

    def write(self, f, spc):
        a = self.bounds[0]
        b = self.bounds[1]
        order = self.grid_num

        coefs_3 = self.mean.__coeffs__

        elem1 = Z_to_element(spc[0])
        elem2 = Z_to_element(spc[1])
        elem3 = Z_to_element(spc[2])

        header_3 = '{elem1} {elem2} {elem3} {a1} {a2} {a3} {b1}'\
                   ' {b2} {b3:.10e} {order1} {order2} {order3}\n'\
            .format(elem1=elem1, elem2=elem2, elem3=elem3,
                    a1=a[0], a2=a[1], a3=a[2],
                    b1=b[0], b2=b[1], b3=b[2],
                    order1=order[0], order2=order[1], order3=order[2])
        f.write(header_3)

        n = 0
        for i in range(coefs_3.shape[0]):
            for j in range(coefs_3.shape[1]):
                for k in range(coefs_3.shape[2]):
                    coef = coefs_3[i, j, k]
                    f.write('{:.10e} '.format(coef))
                    if n % 5 == 4:
                        f.write('\n')
                    n += 1

        f.write('\n')
コード例 #15
0
class Map2body:
    def __init__(self, grid_num, bounds, cutoffs, bond_struc, bodies='2',
                 svd_rank=0, mean_only=False, n_cpus=1, n_sample=100):
        '''
        Build 2-body MGP
        '''

        self.grid_num = grid_num
        self.l_bounds, self.u_bounds = bounds
        self.cutoffs = cutoffs
        self.bond_struc = bond_struc
        self.species = bond_struc.coded_species
        self.bodies = bodies
        self.svd_rank = svd_rank
        self.mean_only = mean_only
        self.n_cpus = n_cpus
        self.n_sample = n_sample

        self.build_map_container()

    def GenGrid(self, GP, processes=1):

        '''
        generate grid data of mean prediction and L^{-1}k* for each triplet
         implemented in a parallelized style
        '''
        kernel_info = get_2bkernel(GP)

        if (self.n_cpus is None):
            processes = mp.cpu_count()
        else:
            processes = self.n_cpus

        # ------ construct grids ------
        nop = self.grid_num
        bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop)
        bond_means = np.zeros([nop])
        if not self.mean_only:
            bond_vars = np.zeros([nop, len(GP.alpha)])
        else:
            bond_vars = None
        env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs)

        if processes == 1 :
            k12_v_all = self._GenGrid_inner(GP.name, 0, len(GP.training_data),
                                            bond_lengths, env12, kernel_info)
        else:
            with mp.Pool(processes=processes) as pool:
                size = len(GP.training_data)
                block_id, nbatch = partition_c(self.n_sample, size, processes)

                k12_slice = []
                k12_v_all = np.zeros([len(bond_lengths), size*3])
                count = 0
                base = 0
                for ibatch in range(nbatch):
                    s, e = block_id[ibatch]
                    k12_slice.append(\
                            pool.apply_async(self._GenGrid_inner,
                                             args=(GP.name, s, e,
                                                   bond_lengths,
                                                   env12, kernel_info)))
                    count += 1
                    # when there are too many threads, collect some of
                    # the result to reduce memory footprint
                    if (count > processes*2):
                        for ibase in range(count):
                            s, e = block_id[ibase+base]
                            k12_v_all[:, s*3:e*3] = k12_slice[ibase].get()
                        del k12_slice
                        k12_slice = []
                        count = 0
                        base = ibatch+1
                if (count > 0):
                   for ibase in range(count):
                       s, e = block_id[ibase+base]
                       vec =  k12_slice[ibase].get()
                       k12_v_all[:, s*3:e*3] = k12_slice[ibase].get()
                   del k12_slice
                pool.close()
                pool.join()

        for b, r in enumerate(bond_lengths):
            k12_v = k12_v_all[b, :]
            bond_means[b] = np.matmul(k12_v, GP.alpha)
            if not self.mean_only:
                bond_vars[b, :] = solve_triangular(GP.l_mat, k12_v, lower=True)

        return bond_means, bond_vars


    def _GenGrid_inner(self, name, s, e, bond_lengths,
            env12, kernel_info):

        '''
        generate grid for each cos angle, used to parallelize grid generation
        '''

        kernel, efk, cutoffs, hyps, hyps_mask = kernel_info
        size = e - s
        k12_v = np.zeros([len(bond_lengths), size*3])
        for b, r in enumerate(bond_lengths):
            env12.bond_array_2 = np.array([[r, 1, 0, 0]])
            k12_v[b, :] = get_kernel_vector_unit(
                    name, s, e, env12, 1,
                    kernel, hyps, cutoffs, hyps_mask)
        return k12_v

    def build_map_container(self):
        self.mean = CubicSpline(self.l_bounds, self.u_bounds,
                                orders=[self.grid_num])

        if not self.mean_only:
            self.var = PCASplines(self.l_bounds, self.u_bounds,
                                  orders=[self.grid_num],
                                  svd_rank=self.svd_rank)

    def build_map(self, GP):
        '''
        build 1-d spline function for mean, 2-d for var
        '''
        assert (GP.multihyps is False), "multihyps is not supported in mgp"
        y_mean, y_var = self.GenGrid(GP)
        self.mean.set_values(y_mean)
        if not self.mean_only:
            self.var.set_values(y_var)

    def write(self, f, spc):
        '''
        Write LAMMPS coefficient file
        '''
        a = self.l_bounds[0]
        b = self.u_bounds[0]
        order = self.grid_num

        coefs_2 = self.mean.__coeffs__

        elem1 = Z_to_element(spc[0])
        elem2 = Z_to_element(spc[1])
        header_2 = '{elem1} {elem2} {a} {b} {order}\n'\
            .format(elem1=elem1, elem2=elem2, a=a, b=b, order=order)
        f.write(header_2)

        for c, coef in enumerate(coefs_2):
            f.write('{:.10e} '.format(coef))
            if c % 5 == 4 and c != len(coefs_2)-1:
                f.write('\n')

        f.write('\n')
コード例 #16
0
class Map2body:
    def __init__(self,
                 grid_num: int,
                 bounds,
                 bond_struc: Structure,
                 svd_rank=0,
                 mean_only: bool = False,
                 n_cpus: int = None,
                 n_sample: int = 100):
        '''
        Build 2-body MGP

        bond_struc: Mock structure used to sample 2-body forces on 2 atoms
        '''

        self.grid_num = grid_num
        self.bounds = bounds
        self.bond_struc = bond_struc
        self.svd_rank = svd_rank
        self.mean_only = mean_only
        self.n_cpus = n_cpus
        self.n_sample = n_sample

        spc = bond_struc.coded_species
        self.species_code = Z_to_element(spc[0]) + '_' + Z_to_element(spc[1])

        #        arg_dict = inspect.getargvalues(inspect.currentframe())[3]
        #        del arg_dict['self']
        #        self.__dict__.update(arg_dict)

        self.build_map_container()

    def GenGrid(self, GP):
        '''
        To use GP to predict value on each grid point, we need to generate the
        kernel vector kv whose length is the same as the training set size.

        1. We divide the training set into several batches, corresponding to
           different segments of kv
        2. Distribute each batch to a processor, i.e. each processor calculate
           the kv segment of one batch for all grids
        3. Collect kv segments and form a complete kv vector for each grid,
           and calculate the grid value by multiplying the complete kv vector
           with GP.alpha
        '''

        kernel_info = get_2bkernel(GP)

        if (self.n_cpus is None):
            processes = mp.cpu_count()
        else:
            processes = self.n_cpus

        # ------ construct grids ------
        nop = self.grid_num
        bond_lengths = np.linspace(self.bounds[0][0], self.bounds[1][0], nop)
        bond_means = np.zeros([nop])
        if not self.mean_only:
            bond_vars = np.zeros([nop, len(GP.alpha)])
        else:
            bond_vars = None
        env12 = AtomicEnvironment(self.bond_struc, 0, GP.cutoffs)

        with mp.Pool(processes=processes) as pool:
            # A_list = pool.map(self._GenGrid_inner_most, pool_list)
            # break it into pieces
            size = len(GP.training_data)
            block_id, nbatch = partition_c(self.n_sample, size, processes)

            k12_slice = []
            k12_v_all = np.zeros([len(bond_lengths), size * 3])
            count = 0
            base = 0
            for ibatch in range(nbatch):
                s, e = block_id[ibatch]
                k12_slice.append(
                    pool.apply_async(self._GenGrid_inner,
                                     args=(GP.name, s, e, bond_lengths, env12,
                                           kernel_info)))
                count += 1
                if (count > processes * 2):
                    for ibase in range(count):
                        s, e = block_id[ibase + base]
                        k12_v_all[:, s * 3:e * 3] = k12_slice[ibase].get()
                    del k12_slice
                    k12_slice = []
                    count = 0
                    base = ibatch + 1
            if (count > 0):
                for ibase in range(count):
                    s, e = block_id[ibase + base]
                    k12_v_all[:, s * 3:e * 3] = k12_slice[ibase].get()
                del k12_slice
            pool.close()
            pool.join()

        for b, r in enumerate(bond_lengths):
            k12_v = k12_v_all[b, :]
            bond_means[b] = np.matmul(k12_v, GP.alpha)
            if not self.mean_only:
                bond_vars[b, :] = solve_triangular(GP.l_mat, k12_v, lower=True)

        write_species_name = ''
        for x in self.bond_struc.coded_species:
            write_species_name += "_" + Z_to_element(x)
        # ------ save mean and var to file -------
        np.save('grid2_mean' + write_species_name, bond_means)
        np.save('grid2_var' + write_species_name, bond_vars)

        return bond_means, bond_vars

    def _GenGrid_inner(self, name, s, e, bond_lengths, env12, kernel_info):
        '''
        Calculate kv segments of the given batch of training data for all grids
        '''

        kernel, en_force_kernel, cutoffs, hyps, hyps_mask = kernel_info
        size = e - s
        k12_v = np.zeros([len(bond_lengths), size * 3])
        for b, r in enumerate(bond_lengths):
            env12.bond_array_2 = np.array([[r, 1, 0, 0]])
            k12_v[b, :] = en_kern_vec(name, s, e, env12, en_force_kernel, hyps,
                                      cutoffs, hyps_mask)
        return k12_v

    def build_map_container(self):
        '''
        build 1-d spline function for mean, 2-d for var
        '''
        self.mean = CubicSpline(self.bounds[0],
                                self.bounds[1],
                                orders=[self.grid_num])

        if not self.mean_only:
            self.var = PCASplines(self.bounds[0],
                                  self.bounds[1],
                                  orders=[self.grid_num],
                                  svd_rank=self.svd_rank)

    def build_map(self, GP):
        y_mean, y_var = self.GenGrid(GP)
        self.mean.set_values(y_mean)
        if not self.mean_only:
            self.var.set_values(y_var)

    def write(self, f, spc):
        '''
        Write LAMMPS coefficient file
        '''
        a = self.bounds[0][0]
        b = self.bounds[1][0]
        order = self.grid_num

        coefs_2 = self.mean.__coeffs__

        elem1 = Z_to_element(spc[0])
        elem2 = Z_to_element(spc[1])
        header_2 = '{elem1} {elem2} {a} {b} {order}\n'\
            .format(elem1=elem1, elem2=elem2, a=a, b=b, order=order)
        f.write(header_2)

        for c, coef in enumerate(coefs_2):
            f.write('{:.10e} '.format(coef))
            if c % 5 == 4 and c != len(coefs_2) - 1:
                f.write('\n')

        f.write('\n')
コード例 #17
0
ファイル: mgp.py プロジェクト: smheidrich/flare
class Map3body:

    def __init__(self, grid_num, bounds, cutoffs, bond_struc, bodies='3',
            svd_rank=0, mean_only=False, load_grid=None, update=True):
        '''
        Build 3-body MGP
        '''

        self.grid_num = grid_num
        self.l_bounds, self.u_bounds = bounds
        self.cutoffs = cutoffs
        self.bond_struc = bond_struc
        self.species = bond_struc.coded_species
        self.bodies = bodies
        self.svd_rank = svd_rank
        self.mean_only = mean_only
        self.load_grid = load_grid
        self.update = update

        self.build_map_container()


    def GenGrid(self, GP, processes=mp.cpu_count()):

        '''
        generate grid data of mean prediction and L^{-1}k* for each triplet
         implemented in a parallelized style
        '''
        # ------ change GP kernel to 3 body ------
        original_kernel = GP.kernel
        original_hyps = np.copy(GP.hyps)
        GP.kernel = three_body_mc
        GP.hyps = GP.hyps[-3:]

        # ------ construct grids ------
        nop = self.grid_num[0]
        noa = self.grid_num[2]
        bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop)
        cos_angles = np.linspace(self.l_bounds[2], self.u_bounds[2], noa)
        bond_means = np.zeros([nop, nop, noa])
        bond_vars = np.zeros([nop, nop, noa, len(GP.alpha)])
        env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs)

        pool_list = [(i, cos_angles[i], bond_lengths, GP, env12, self.update)\
                     for i in range(noa)]
        pool = mp.Pool(processes=processes)

        if self.update:
            if 'kv3' in os.listdir():
                subprocess.run(['rm', '-r', 'kv3'])
            subprocess.run(['mkdir', 'kv3'])
       
        A_list = pool.map(self._GenGrid_inner, pool_list)
        for a12 in range(noa):
            bond_means[:, :, a12] = A_list[a12][0]
            bond_vars[:, :, a12, :] = A_list[a12][1]
        pool.close()
        pool.join()

        # ------ change back to original GP ------
        GP.hyps = original_hyps
        GP.kernel = original_kernel
      
        # ------ save mean and var to file -------
        np.save('grid3_mean', bond_means)
        np.save('grid3_var', bond_vars)

        return bond_means, bond_vars

    def _GenGrid_inner(self, params):

        '''
        generate grid for each angle, used to parallelize grid generation
        '''
        a12, cos_angle12, bond_lengths, GP, env12, update = params
        nop = self.grid_num[0]
        bond_means = np.zeros([nop, nop])
        bond_vars = np.zeros([nop, nop, len(GP.alpha)])

        # open saved k vector file, and write to new file
        if update:
            kv_filename = 'kv3/'+str(a12)
            size = len(GP.training_data) * 3
            new_kv_file = np.zeros((nop**2+1, size))
            new_kv_file[0,0] = size
            if str(a12)+'.npy' in os.listdir('kv3'):
                old_kv_file = np.load(kv_filename+'.npy') 
                last_size = int(old_kv_file[0,0])
                new_kv_file[:, :last_size] = old_kv_file
            else:
                last_size = 0
            ds = [1, 2, 3]

        for b1, r1 in enumerate(bond_lengths):
            r1 = bond_lengths[b1]
            for b2, r2 in enumerate(bond_lengths):
                x2 = r2 * cos_angle12
                y2 = np.sqrt(r2**2 - x2**2)
                r12 = np.linalg.norm(np.array([x2-r1, y2, 0]))

                env12.bond_array_3 = np.array([[r1, 1, 0, 0], [r2, 0, 0, 0]])
                env12.cross_bond_dists = np.array([[0, r12], [r12, 0]])

                # calculate kernel functions of those newly added training data
                if update:
                    k12_v = new_kv_file[1+b1*nop+b2, :]
                    for m_index in range(last_size, size):
                        x_2 = GP.training_data[int(math.floor(m_index / 3))]
                        d_2 = ds[m_index % 3]
                        k12_v[m_index] = GP.kernel(env12, x_2, 1, d_2,
                                               GP.hyps, GP.cutoffs)
                else:
                    k12_v = GP.get_kernel_vector(env12, 1)   

                if update:
                    new_kv_file[1+b1*nop+b2, :] = k12_v

                # calculate mean and var value for the mapping
                mean_diff = np.matmul(k12_v, GP.alpha)
                bond_means[b1, b2] = mean_diff

                if not self.mean_only:
                    v12_vec = solve_triangular(GP.l_mat, k12_v, lower=True)
                    bond_vars[b1, b2, :] = v12_vec

        # replace the old file with the new file
        if update:
            np.save(kv_filename, new_kv_file)

        return bond_means, bond_vars

    def build_map_container(self):
       # create spline interpolation class object
        nop = self.grid_num[0]
        noa = self.grid_num[2]
        self.mean = CubicSpline(self.l_bounds, self.u_bounds, 
                                orders=[nop, nop, noa])

        if not self.mean_only:
            self.var = PCASplines(self.l_bounds, self.u_bounds,
                                  orders=[nop, nop, noa],
                                  svd_rank=self.svd_rank)

    def build_map(self, GP):

        '''
        build 3-d spline function for mean,
        3-d for the low rank approximation of L^{-1}k*
        '''

        # Load grid or generate grid values
        if not self.load_grid:
            y_mean, y_var = self.GenGrid(GP)
        else:
            y_mean = np.load(self.load_grid+'/grid3_mean.npy')
            y_var = np.load(self.load_grid+'/grid3_var.npy')

        self.mean.set_values(y_mean)
        if not self.mean_only:
            self.var.set_values(y_var)
コード例 #18
0
class Map3body:

    def __init__(self, grid_num, bounds, cutoffs, bond_struc, bodies='3',
            svd_rank=0, mean_only=False, load_grid=None, update=True,
            n_cpus=1, n_sample=100):
        '''
        Build 3-body MGP
        '''

        self.grid_num = grid_num
        self.l_bounds, self.u_bounds = bounds
        self.cutoffs = cutoffs
        self.bond_struc = bond_struc
        self.species = bond_struc.coded_species
        self.bodies = bodies
        self.svd_rank = svd_rank
        self.mean_only = mean_only
        self.load_grid = load_grid
        self.update = update
        self.n_cpus = n_cpus
        self.n_sample = n_sample

        self.build_map_container()


    def GenGrid(self, GP):
        '''
        generate grid data of mean prediction and L^{-1}k* for each triplet
         implemented in a parallelized style
        '''

        if (self.n_cpus is None):
            processes = mp.cpu_count()
        else:
            processes = self.n_cpus

        if processes == 1:
            return self.GenGrid_serial(GP)

        # ------ get 3body kernel info ------
        kernel_info = get_3bkernel(GP)

        # ------ construct grids ------
        nop = self.grid_num[0]
        noa = self.grid_num[2]
        bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop)
        cos_angles = np.linspace(self.l_bounds[2], self.u_bounds[2], noa)

        bond_means = np.zeros([nop, nop, noa])
        if not self.mean_only:
            bond_vars = np.zeros([nop, nop, noa, len(GP.alpha)])
        else:
            bond_vars = None
        env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs)

        with mp.Pool(processes=processes) as pool:
            if self.update:
                if 'kv3' in os.listdir():
                    os.rmdir('kv3')
                os.mkdir('kv3')

            size = len(GP.training_data)
            block_id, nbatch = partition_c(self.n_sample, size, processes)

            k12_slice = []
            if (size>5000):
                print('parallel set up:', size, ns, n_sample, time.time())
            count = 0
            base = 0
            k12_v_all = np.zeros([len(bond_lengths), len(bond_lengths), len(cos_angles), size*3])
            for ibatch in range(nbatch):
                s, e = block_id[ibatch]
                k12_slice.append(\
                        pool.apply_async(self._GenGrid_inner_most,
                                         args=(GP.name, s, e,
                                               cos_angles, bond_lengths,
                                               env12, kernel_info)))
                if (size>5000):
                    print('send', ibatch, ns, s, e, time.time())
                count += 1
                if (count > processes*2):
                    for ibase in range(count):
                        s, e = block_id[ibase+base]
                        k12_v_all[:, :, :, s*3:e*3] = k12_slice[ibase].get()
                        if (size>5000):
                            print('get', ibase+base)
                    del k12_slice
                    k12_slice = []
                    count = 0
                    base = ibatch+1
            if (count > 0):
               for ibase in range(count):
                   s, e = block_id[ibase+base]
                   k12_v_all[:, :, :, s*3:e*3] = k12_slice[ibase].get()
               del k12_slice

            pool.close()
            pool.join()

        for a12, cos_angle in enumerate(cos_angles):
            for b1, r1 in enumerate(bond_lengths):
                for b2, r2 in enumerate(bond_lengths):
                    k12_v = k12_v_all[b1, b2, a12, :]
                    bond_means[b1, b2, a12] = np.matmul(k12_v, GP.alpha)
                    if not self.mean_only:
                        bond_vars[b1, b2, a12, :] = solve_triangular(GP.l_mat, k12_v, lower=True)


        # # ------ save mean and var to file -------
        np.save('grid3_mean', bond_means)
        np.save('grid3_var', bond_vars)

        return bond_means, bond_vars

    def GenGrid_serial(self, GP):
        '''
        generate grid data of mean prediction and L^{-1}k* for each triplet
         implemented in a parallelized style
        '''
        
        # ------ get 3body kernel info ------
        kernel, efk, cutoffs, hyps, hyps_mask = get_3bkernel(GP)

        # ------ construct grids ------
        nop = self.grid_num[0]
        noa = self.grid_num[2]
        bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop)
        cos_angles = np.linspace(self.l_bounds[2], self.u_bounds[2], noa)
        bond_means = np.zeros([nop, nop, noa])
        bond_vars = np.zeros([nop, nop, noa, len(GP.alpha)])
        env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs)

        if self.update:
            if 'kv3' in os.listdir():
                os.rmdir('kv3')
            os.mkdir('kv3')

        size = len(GP.training_data)
        ds = [1, 2, 3]
        k_v = np.zeros(3)
        k12_v_all = np.zeros([len(bond_lengths), len(bond_lengths),
                              len(cos_angles), size*3])
        for b1, r1 in enumerate(bond_lengths):
            for b2, r2 in enumerate(bond_lengths):
                for a12, cos_angle12 in enumerate(cos_angles):

                    x2 = r2 * cos_angle12
                    y2 = r2 * np.sqrt(1-cos_angle12**2)
                    r12 = np.linalg.norm(np.array([x2-r1, y2, 0]))

                    env12.bond_array_3 = np.array([[r1, 1, 0, 0],
                                                   [r2, 0, 0, 0]])
                    env12.cross_bond_dists = np.array([[0, r12], [r12, 0]])

                    for isample, sample in enumerate(GP.training_data):
                        for d in ds:
                            k_v[d-1] = kernel(env12, sample, 1, d,
                                              hyps, cutoffs)

                        k12_v_all[b1, b2, a12, isample*3:isample*3+3] = k_v

        for b1, r1 in enumerate(bond_lengths):
            for b2, r2 in enumerate(bond_lengths):
                for a12, cos_angle in enumerate(cos_angles):
                    k12_v = k12_v_all[b1, b2, a12, :]
                    bond_means[b1, b2, a12] = np.matmul(k12_v, GP.alpha)
                    if not self.mean_only:
                        bond_vars[b1, b2, a12, :] = solve_triangular(GP.l_mat, k12_v, lower=True)

        # # ------ save mean and var to file -------
        np.save('grid3_mean', bond_means)
        np.save('grid3_var', bond_vars)

        return bond_means, bond_vars

    def _GenGrid_inner_most(self, name, s, e, cos_angles, bond_lengths, env12, kernel_info):

        '''
        generate grid for each cos_angle, used to parallelize grid generation
        '''

        kernel, efk, cutoffs, hyps, hyps_mask = kernel_info
        training_data = gp_algebra._global_training_data[name]
        # open saved k vector file, and write to new file
        size = (e-s)*3
        k12_v = np.zeros([len(bond_lengths), len(bond_lengths),
                          len(cos_angles), size])
        for a12, cos_angle12 in enumerate(cos_angles):
            for b1, r1 in enumerate(bond_lengths):
                for b2, r2 in enumerate(bond_lengths):

                    x2 = r2 * cos_angle12
                    y2 = r2 * np.sqrt(1-cos_angle12**2)
                    r12 = np.linalg.norm(np.array([x2-r1, y2, 0]))

                    env12.bond_array_3 = np.array([[r1, 1, 0, 0],
                                                   [r2, 0, 0, 0]])
                    env12.cross_bond_dists = np.array([[0, r12], [r12, 0]])

                    k12_v[b1, b2, a12, :] = \
                            get_kernel_vector_unit(name, s, e, env12, 1,
                                    kernel, hyps, cutoffs, hyps_mask)

        return k12_v

    def build_map_container(self):
       # create spline interpolation class object
        self.mean = CubicSpline(self.l_bounds, self.u_bounds,
                                orders=self.grid_num)

        if not self.mean_only:
            self.var = PCASplines(self.l_bounds, self.u_bounds,
                                  orders=self.grid_num,
                                  svd_rank=self.svd_rank)

    def build_map(self, GP):

        '''
        build 3-d spline function for mean,
        3-d for the low rank approximation of L^{-1}k*
        '''

        assert (GP.multihyps is False), "multihyps is not supported in mgp"

        # Load grid or generate grid values
        if not self.load_grid:
            y_mean, y_var = self.GenGrid(GP)
        else:
            y_mean = np.load(self.load_grid+'/grid3_mean.npy')
            y_var = np.load(self.load_grid+'/grid3_var.npy')

        self.mean.set_values(y_mean)
        if not self.mean_only:
            self.var.set_values(y_var)

    def write(self, f, spc):
        a = self.l_bounds
        b = self.u_bounds
        order = self.grid_num

        coefs_3 = self.mean.__coeffs__

        elem1 = Z_to_element(spc[0])
        elem2 = Z_to_element(spc[1])
        elem3 = Z_to_element(spc[2])

        header_3 = '{elem1} {elem2} {elem3} {a1} {a2} {a3} {b1}'\
                   ' {b2} {b3:.10e} {order1} {order2} {order3}\n'\
            .format(elem1=elem1, elem2=elem2, elem3=elem3,
                    a1=a[0], a2=a[1], a3=a[2],
                    b1=b[0], b2=b[1], b3=b[2],
                    order1=order[0], order2=order[1], order3=order[2])
        f.write(header_3)

        n = 0
        for i in range(coefs_3.shape[0]):
            for j in range(coefs_3.shape[1]):
                for k in range(coefs_3.shape[2]):
                    coef = coefs_3[i, j, k]
                    f.write('{:.10e} '.format(coef))
                    if n % 5 == 4:
                        f.write('\n')
                    n += 1

        f.write('\n')
コード例 #19
0
ファイル: mgp.py プロジェクト: smheidrich/flare
class Map2body:
    def __init__(self, grid_num, bounds, cutoffs, bond_struc, bodies='2',
                 svd_rank=0, mean_only=False):
        '''
        Build 2-body MGP
        '''

        self.grid_num = grid_num
        self.l_bounds, self.u_bounds = bounds
        self.cutoffs = cutoffs
        self.bond_struc = bond_struc
        self.species = bond_struc.coded_species
        self.bodies = bodies
        self.svd_rank = svd_rank
        self.mean_only = mean_only

        self.build_map_container()

    def GenGrid(self, GP, processes=mp.cpu_count()):

        '''
        generate grid data of mean prediction and L^{-1}k* for each triplet
         implemented in a parallelized style
        '''

        # ------ change GP kernel to 2 body ------
        original_kernel = GP.kernel
        GP.kernel = two_body_mc
        original_cutoffs = np.copy(GP.cutoffs)
        GP.cutoffs = [GP.cutoffs[0]]
        original_hyps = np.copy(GP.hyps)
        GP.hyps = [GP.hyps[0], GP.hyps[1], GP.hyps[-1]]

        # ------ construct grids ------
        nop = self.grid_num
        bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop)
        bond_means = np.zeros([nop])
        bond_vars = np.zeros([nop, len(GP.alpha)])
        env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs)

        pool_list = [(i, bond_lengths, GP, env12)
                     for i in range(nop)]
        pool = mp.Pool(processes=processes)
        A_list = pool.map(self._GenGrid_inner, pool_list)
        for p in range(nop):
            bond_means[p] = A_list[p][0]
            bond_vars[p, :] = A_list[p][1]
        pool.close()
        pool.join()

        # ------ change back original GP ------
        GP.cutoffs = original_cutoffs
        GP.hyps = original_hyps
        GP.kernel = original_kernel

        return bond_means, bond_vars

    def _GenGrid_inner(self, params):

        '''
        generate grid for each angle, used to parallelize grid generation
        '''
        b, bond_lengths, GP, env12 = params
        # nop = self.grid_num
        r = bond_lengths[b]
        env12.bond_array_2 = np.array([[r, 1, 0, 0]])

        k12_v = GP.get_kernel_vector(env12, 1)
        mean_diff = np.matmul(k12_v, GP.alpha)
        bond_means = mean_diff
        bond_vars = np.zeros(k12_v.shape)

        if not self.mean_only:
            v12_vec = solve_triangular(GP.l_mat, k12_v, lower=True)
            bond_vars = v12_vec

        return bond_means, bond_vars

    def build_map_container(self):
        self.mean = CubicSpline(self.l_bounds, self.u_bounds, 
                                orders=[self.grid_num])

        if not self.mean_only:
            self.var = PCASplines(self.l_bounds, self.u_bounds,
                                  orders=[self.grid_num],
                                  svd_rank=self.svd_rank)
        
    def build_map(self, GP):
        '''
        build 1-d spline function for mean, 2-d for var
        '''
        y_mean, y_var = self.GenGrid(GP)
        self.mean.set_values(y_mean)
        if not self.mean_only:
            self.var.set_values(y_var)
コード例 #20
0
ファイル: mapxb.py プロジェクト: aaronchen0316/flare
class SingleMapXbody:
    def __init__(
        self,
        grid_num: int = 1,
        bounds="auto",
        species: list = [],
        svd_rank=0,
        var_map: str = None,
        load_grid=None,
        lower_bound_relax=0.1,
        n_cpus: int = None,
        n_sample: int = 100,
        **kwargs,
    ):

        self.grid_num = grid_num
        self.bounds = deepcopy(bounds)
        self.species = species
        self.svd_rank = svd_rank
        self.var_map = var_map
        self.load_grid = load_grid
        self.lower_bound_relax = lower_bound_relax
        self.n_cpus = n_cpus
        self.n_sample = n_sample

        self.auto_lower = bounds[0] == "auto"
        if self.auto_lower:
            lower_bound = None
        else:
            lower_bound = bounds[0]

        self.auto_upper = bounds[1] == "auto"
        if self.auto_upper:
            upper_bound = None
        else:
            upper_bound = bounds[1]

        self.set_bounds(lower_bound, upper_bound)

        self.hyps_mask = None

        if not self.auto_lower and not self.auto_upper:
            self.build_map_container()

    def set_bounds(self, lower_bound, upper_bound):
        raise NotImplementedError("need to be implemented in child class")

    def construct_grids(self):
        raise NotImplementedError("need to be implemented in child class")

    def LoadGrid(self):
        if "mgp_grids" not in os.listdir(self.load_grid):
            raise FileNotFoundError(
                "Please set 'load_grid' as the location of mgp_grids folder")

        grid_path = f"{self.load_grid}/mgp_grids/{self.bodies}_{self.species_code}"
        grid_mean = np.load(f"{grid_path}_mean.npy")
        grid_vars = np.load(f"{grid_path}_var.npy", allow_pickle=True)
        return grid_mean, grid_vars

    def GenGrid(self, GP):
        """
        To use GP to predict value on each grid point, we need to generate the
        kernel vector kv whose length is the same as the training set size.

        1. We divide the training set into several batches, corresponding to
           different segments of kv
        2. Distribute each batch to a processor, i.e. each processor calculate
           the kv segment of one batch for all grids
        3. Collect kv segments and form a complete kv vector for each grid,
           and calculate the grid value by multiplying the complete kv vector
           with GP.alpha
        """

        if self.load_grid is not None:
            return self.LoadGrid()

        if self.n_cpus is None:
            processes = mp.cpu_count()
        else:
            processes = self.n_cpus

        # -------- get training data info ----------
        n_envs = len(GP.training_data)
        n_strucs = len(GP.training_structures)

        if (n_envs == 0) and (n_strucs == 0):
            warnings.warn("No training data, will return 0")
            return np.zeros([n_grid]), None

        # ------ construct grids ------
        n_grid = np.prod(self.grid_num)
        grid_mean = np.zeros([n_grid])
        if self.var_map is not None:
            grid_vars = np.zeros([n_grid, len(GP.alpha)])
        else:
            grid_vars = None

        # ------- call gengrid functions ---------------
        kernel_info = get_kernel_term(self.kernel_name, GP.hyps_mask, GP.hyps)
        args = [GP.name, kernel_info]

        k12_v_force = self._gengrid_par(args, True, n_envs, processes)
        k12_v_energy = self._gengrid_par(args, False, n_strucs, processes)

        k12_v_all = np.hstack([k12_v_force, k12_v_energy])
        del k12_v_force
        del k12_v_energy

        # ------- compute bond means and variances ---------------
        grid_mean = k12_v_all @ GP.alpha
        grid_mean = np.reshape(grid_mean, self.grid_num)

        if self.var_map is not None:
            grid_vars = solve_triangular(GP.l_mat, k12_v_all.T, lower=True).T

            if self.var_map == "simple":
                self_kern = self._gengrid_var_simple(kernel_info)
                grid_vars = np.sqrt(self_kern - np.sum(grid_vars**2, axis=1))
                grid_vars = np.expand_dims(grid_vars, axis=1)

            tensor_shape = np.array([*self.grid_num, grid_vars.shape[1]])
            grid_vars = np.reshape(grid_vars, tensor_shape)

        # ------ save mean and var to file -------
        if "mgp_grids" not in os.listdir("./"):
            os.mkdir("mgp_grids")

        grid_path = f"mgp_grids/{self.bodies}_{self.species_code}"
        np.save(f"{grid_path}_mean", grid_mean)
        np.save(f"{grid_path}_var", grid_vars)

        return grid_mean, grid_vars

    def _gengrid_par(self, args, force_block, n_envs, processes):

        if n_envs == 0:
            n_grid = np.prod(self.grid_num)
            return np.empty((n_grid, 0))

        gengrid_func = self._gengrid_inner

        if processes == 1:
            return gengrid_func(*args, force_block, 0, n_envs)

        with mp.Pool(processes=processes) as pool:

            block_id, nbatch = partition_vector(self.n_sample, n_envs,
                                                processes)

            k12_slice = []
            for ibatch in range(nbatch):
                s, e = block_id[ibatch]
                k12_slice.append(
                    pool.apply_async(gengrid_func,
                                     args=args + [force_block, s, e]))
            k12_matrix = []
            for ibatch in range(nbatch):
                k12_matrix += [k12_slice[ibatch].get()]
            pool.close()
            pool.join()
        del k12_slice
        k12_v_force = np.hstack(k12_matrix)
        del k12_matrix

        return k12_v_force

    def _gengrid_inner(self, name, kernel_info, force_block, s, e):
        """
        Loop over different parts of the training set. from element s to element e

        Args:
            name: name of the gp instance
            s: start index of the training data parition
            e: end index of the training data parition
            kernel_info: return value of the get_3b_kernel
        """

        _, cutoffs, hyps, hyps_mask = kernel_info

        r_cut = cutoffs[self.kernel_name]

        n_grids = np.prod(self.grid_num)

        if np.any(np.array(self.bounds[1]) <= 0.0):
            if force_block:
                return np.zeros((n_grids, (e - s) * 3))
            else:
                return np.zeros((n_grids, e - s))

        grids = self.construct_grids()
        coords = np.zeros((grids.shape[0], self.grid_dim * 3),
                          dtype=np.float64)  # padding 0
        coords[:, 0] = np.ones_like(coords[:, 0])

        fj, fdj = self.grid_cutoff(grids,
                                   r_cut,
                                   coords,
                                   derivative=True,
                                   cutoff_func=cf.quadratic_cutoff)
        fdj = fdj[:, [0]]

        if force_block:
            training_data = _global_training_data[name]
            kern_type = f"energy_force"
        else:
            training_data = _global_training_structures[name]
            kern_type = f"energy_energy"

        k_v = []
        chunk_size = 32**3
        if n_grids > chunk_size:
            n_chunk = ceil(n_grids / chunk_size)
        else:
            n_chunk = 1

        for m_index in range(s, e):
            data = training_data[m_index]
            kern_vec = []
            for g in range(n_chunk):
                gs = chunk_size * g
                ge = np.min((chunk_size * (g + 1), n_grids))
                grid_chunk = grids[gs:ge, :]
                fj_chunk = fj[gs:ge, :]
                fdj_chunk = fdj[gs:ge, :]
                kv_chunk = self.get_grid_kernel(
                    kern_type,
                    data,
                    kernel_info,
                    grid_chunk,
                    fj_chunk,
                    fdj_chunk,
                )
                kern_vec.append(kv_chunk)
            kern_vec = np.hstack(kern_vec)
            k_v.append(kern_vec)

        if len(k_v) > 0:
            k_v = np.vstack(k_v).T
        else:
            k_v = np.zeros((n_grids, 0))

        return k_v

    def _gengrid_var_simple(self, kernel_info):
        """
        Generate grids for variance upper bound, based on the inequality:
        V(c, p)^2 <= V(c, c) V(p, p)
        where c, p are two bonds/triplets or environments
        """

        _, cutoffs, hyps, hyps_mask = kernel_info

        r_cut = cutoffs[self.kernel_name]

        grids = self.construct_grids()
        coords = np.zeros((grids.shape[0], self.grid_dim * 3),
                          dtype=np.float64)  # padding 0
        coords[:, 0] = np.ones_like(coords[:, 0])

        fj, fdj = self.grid_cutoff(grids,
                                   r_cut,
                                   coords,
                                   derivative=True,
                                   cutoff_func=cf.quadratic_cutoff)
        fdj = fdj[:, [0]]

        return self.get_self_kernel(kernel_info, grids, fj, fdj)

    def build_map_container(self):
        """
        build 1-d spline function for mean, 2-d for var
        """
        if np.any(np.array(self.bounds[1]) <= 0.0):
            bounds = [
                np.zeros_like(self.bounds[0]),
                np.ones_like(self.bounds[1])
            ]
        else:
            bounds = self.bounds

        self.mean = CubicSpline(bounds[0], bounds[1], orders=self.grid_num)

        if self.var_map == "pca":
            if self.svd_rank == "auto":
                warnings.warn(
                    "The containers for variance are not built because svd_rank='auto'"
                )

            elif isinstance(self.svd_rank, int):
                self.var = PCASplines(
                    bounds[0],
                    bounds[1],
                    orders=self.grid_num,
                    svd_rank=self.svd_rank,
                )

        if self.var_map == "simple":
            self.var = CubicSpline(bounds[0], bounds[1], orders=self.grid_num)

    def update_bounds(self, GP):
        rebuild_container = False

        # double check the container and the GP is consistent
        if not Parameters.compare_dict(GP.hyps_mask, self.hyps_mask):
            rebuild_container = True

        lower_bound = self.bounds[0]
        min_dist = self.search_lower_bound(GP)
        # change lower bound only when there appears a smaller distance
        if lower_bound is None or min_dist < np.max(lower_bound):
            lower_bound = np.max((min_dist - self.lower_bound_relax, 0.0))
            rebuild_container = True

            warnings.warn(
                "The minimal distance in training data is lower than "
                f"the current lower bound, will reset lower bound to {lower_bound}"
            )

        upper_bound = self.bounds[1]
        if self.auto_upper or upper_bound is None:
            gp_cutoffs = Parameters.get_cutoff(self.kernel_name, self.species,
                                               GP.hyps_mask)
            if upper_bound is None or np.any(gp_cutoffs > upper_bound):
                upper_bound = gp_cutoffs
                rebuild_container = True

        if rebuild_container:
            self.set_bounds(lower_bound, upper_bound)
            self.build_map_container()

    def build_map(self, GP):

        self.update_bounds(GP)

        y_mean, y_var = self.GenGrid(GP)
        self.mean.set_values(y_mean)

        if self.var_map == "pca" and self.svd_rank == "auto":
            self.var = PCASplines(
                self.bounds[0],
                self.bounds[1],
                orders=self.grid_num,
                svd_rank=np.min(y_var.shape),
            )

        if self.var_map is not None:
            self.var.set_values(y_var)

        self.hyps_mask = deepcopy(GP.hyps_mask)

    def __str__(self):
        info = f"""{self.__class__.__name__}
        species: {self.species}
        lower bound: {self.bounds[0]}, auto_lower = {self.auto_lower}
        upper bound: {self.bounds[1]}, auto_upper = {self.auto_upper}
        grid num: {self.grid_num}
        lower bound relaxation: {self.lower_bound_relax}
        load grid from: {self.load_grid}\n"""

        if self.var_map is None:
            info += f"        without variance\n"
        elif self.var_map == "pca":
            info += f"        with PCA variance, svd_rank = {self.svd_rank}\n"
        elif self.var_map == "simple":
            info += f"        with simple variance"

        return info

    def search_lower_bound(self, GP):
        """
        If the lower bound is set to be 'auto', search the minimal interatomic
        distances in the training set of GP.
        """
        upper_bound = Parameters.get_cutoff(self.kernel_name, self.species,
                                            GP.hyps_mask)

        lower_bound = np.min(upper_bound)
        training_data = _global_training_data[GP.name]
        for env in training_data:
            if len(env.bond_array_2) == 0:
                continue

            min_dist = env.bond_array_2[0][0]
            if min_dist < lower_bound:
                lower_bound = min_dist

        training_struc = _global_training_structures[GP.name]
        for struc in training_struc:
            for env in struc:
                if len(env.bond_array_2) == 0:
                    continue

                min_dist = env.bond_array_2[0][0]
                if min_dist < lower_bound:
                    lower_bound = min_dist

        return lower_bound

    def predict(self, lengths, xyzs):
        """
        predict force and variance contribution of one component
        """

        min_dist = np.min(lengths)
        if min_dist < np.max(self.bounds[0]):
            raise ValueError(
                self.species,
                min_dist,
                f"The minimal distance {min_dist:.3f}"
                f" is below the mgp lower bound {self.bounds[0]}",
            )

        max_dist = np.max(lengths)
        if max_dist > np.min(self.bounds[1]):
            raise Exception(
                self.species,
                max_dist,
                f"The atomic environment should have cutoff smaller than the GP cutoff",
            )

        lengths = np.array(lengths)
        xyzs = np.array(xyzs)

        n_neigh = self.bodies - 1
        # predict forces and energy
        e_0, f_0 = self.mean(lengths, with_derivatives=True)
        e = np.sum(e_0)  # energy
        f_d = np.zeros((lengths.shape[0], n_neigh, 3))
        for b in range(n_neigh):
            f_d[:, b, :] = np.diag(f_0[:, b, 0]) @ xyzs[:, b]
        f = self.bodies * np.sum(f_d, axis=(0, 1))

        # predict var
        v = 0
        if self.var_map == "simple":
            v_0 = self.var(lengths)
            v = np.sum(v_0)
        elif self.var_map == "pca":
            v_0 = self.var(lengths)
            v_0 = np.sum(v_0, axis=1)
            v_0 = np.expand_dims(v_0, axis=1)
            v = self.var.V @ v_0

        # predict virial stress
        vir = np.zeros(6)
        vir_order = (
            (0, 0),
            (1, 1),
            (2, 2),
            (1, 2),
            (0, 2),
            (0, 1),
        )  # match the ASE order
        for i in range(6):
            for b in range(n_neigh):
                vir_i = (f_d[:, b, vir_order[i][0]] *
                         xyzs[:, b, vir_order[i][1]] * lengths[:, b])
                vir[i] += np.sum(vir_i)

        vir *= self.bodies / 2
        return f, vir, v, e

    def write(self, f, write_var, permute=False):
        """
        Write LAMMPS coefficient file

        This implementation only works for 2b and 3b. User should
        implement overload in the actual class if the new kernel
        has different coefficient format

        In the future, it should be changed to writing in bin/hex
        instead of decimal
        """

        # write header
        elems = self.species_code.split("_")

        a = self.bounds[0]
        b = self.bounds[1]
        order = self.grid_num

        header = " ".join(elems)
        header += " " + " ".join(map(repr, a))
        header += " " + " ".join(map(repr, b))
        header += " " + " ".join(map(str, order))
        f.write(header + "\n")

        # write coeffs
        if write_var:
            coefs = self.var.__coeffs__
        else:
            coefs = self.mean.__coeffs__

        self.write_flatten_coeff(f, coefs)

    def write_flatten_coeff(self, f, coefs):
        """
        flatten the coefficient and write it as
        a block. each line has no more than 5 element.
        the accuracy is restricted to .10
        """
        coefs = coefs.reshape([-1])
        for c, coef in enumerate(coefs):
            f.write(" " + repr(coef))
            if c % 5 == 4 and c != len(coefs) - 1:
                f.write("\n")
        f.write("\n")