Ejemplo n.º 1
0
    def write(self, f, spc):
        a = self.bounds[0]
        b = self.bounds[1]
        order = self.grid_num

        coefs_3 = self.mean.__coeffs__

        elem1 = Z_to_element(spc[0])
        elem2 = Z_to_element(spc[1])
        elem3 = Z_to_element(spc[2])

        header_3 = '{elem1} {elem2} {elem3} {a1} {a2} {a3} {b1}'\
                   ' {b2} {b3:.10e} {order1} {order2} {order3}\n'\
            .format(elem1=elem1, elem2=elem2, elem3=elem3,
                    a1=a[0], a2=a[1], a3=a[2],
                    b1=b[0], b2=b[1], b3=b[2],
                    order1=order[0], order2=order[1], order3=order[2])
        f.write(header_3)

        n = 0
        for i in range(coefs_3.shape[0]):
            for j in range(coefs_3.shape[1]):
                for k in range(coefs_3.shape[2]):
                    coef = coefs_3[i, j, k]
                    f.write('{:.10e} '.format(coef))
                    if n % 5 == 4:
                        f.write('\n')
                    n += 1

        f.write('\n')
Ejemplo n.º 2
0
    def __init__(self,
                 grid_num: int,
                 bounds,
                 bond_struc: Structure,
                 svd_rank=0,
                 mean_only: bool = False,
                 n_cpus: int = None,
                 n_sample: int = 100):
        '''
        Build 2-body MGP

        bond_struc: Mock structure used to sample 2-body forces on 2 atoms
        '''

        self.grid_num = grid_num
        self.bounds = bounds
        self.bond_struc = bond_struc
        self.svd_rank = svd_rank
        self.mean_only = mean_only
        self.n_cpus = n_cpus
        self.n_sample = n_sample

        spc = bond_struc.coded_species
        self.species_code = Z_to_element(spc[0]) + '_' + Z_to_element(spc[1])

        #        arg_dict = inspect.getargvalues(inspect.currentframe())[3]
        #        del arg_dict['self']
        #        self.__dict__.update(arg_dict)

        self.build_map_container()
Ejemplo n.º 3
0
    def __init__(self,
                 grid_num,
                 bounds,
                 bond_struc: Structure,
                 svd_rank: int = 0,
                 mean_only: bool = False,
                 load_grid: str = '',
                 update: bool = True,
                 n_cpus=None,
                 n_sample=100):
        '''
        Build 3-body MGP

        bond_struc: Mock Structure object which contains 3 atoms to get map
        from
        '''
        self.grid_num = grid_num
        self.bounds = bounds
        self.bond_struc = bond_struc
        self.svd_rank = svd_rank
        self.mean_only = mean_only
        self.load_grid = load_grid
        self.update = update
        self.n_sample = n_sample

        spc = bond_struc.coded_species
        self.species_code = Z_to_element(spc[0]) + '_' + \
                            Z_to_element(spc[1]) + '_' + Z_to_element(spc[2])
        self.kv3name = f'kv3_{self.species_code}'

        self.build_map_container()
        self.n_cpus = n_cpus
        self.bounds = bounds
        self.mean_only = mean_only
Ejemplo n.º 4
0
def test_to_xyz(varied_test_struc):

    simple_str = varied_test_struc.to_xyz(extended_xyz=False,
                print_stds=False, print_forces=False, print_max_stds=False)

    simple_str_by_line = simple_str.split('\n')

    assert len(simple_str_by_line)-3 == len(varied_test_struc)

    for i, atom_line in enumerate(simple_str_by_line[2:-1]):
        split_line = atom_line.split()
        assert split_line[0] == \
               Z_to_element(int(varied_test_struc.species_labels[i]))
        for j in range(3):
            assert float(split_line[1+j]) == varied_test_struc.positions[i][j]


    complex_str = varied_test_struc.to_xyz(True,True,True,True)
    complex_str_by_line = complex_str.split('\n')

    assert len(complex_str_by_line)-3 == len(varied_test_struc)

    for i, atom_line in enumerate(complex_str_by_line[2:-1]):
        split_line = atom_line.split()
        assert split_line[0] == \
               Z_to_element(int(varied_test_struc.species_labels[i]))
        for j in range(1,4):
            assert float(split_line[j]) == varied_test_struc.positions[i][j-1]
        for j in range(4,7):
            assert float(split_line[j]) == varied_test_struc.stds[i][j-4]
        for j in range(7,10):
            assert float(split_line[j]) == varied_test_struc.forces[i][j-7]
        assert float(split_line[10]) == np.max(varied_test_struc.stds[i])
Ejemplo n.º 5
0
def test_Z_to_element():
    for i in range(1, 118):
        assert isinstance(Z_to_element(i), str)

    for pair in zip([1, 6, '8', '118'], ['H', 'C', 'O', 'Og']):
        assert Z_to_element(pair[0]) == pair[1]

    with raises(ValueError):
        Z_to_element('a')
Ejemplo n.º 6
0
    def write_two_body(self, f):
        a = self.bounds_2[0][0]
        b = self.bounds_2[1][0]
        order = self.grid_num_2

        for ind, spc in enumerate(self.spcs[0]):
            coefs_2 = self.maps_2[ind].mean.__coeffs__

            elem1 = Z_to_element(spc[0])
            elem2 = Z_to_element(spc[1])
            header_2 = '{elem1} {elem2} {a} {b} {order}\n'\
                .format(elem1=elem1, elem2=elem2, a=a, b=b, order=order)
            f.write(header_2)

            for c, coef in enumerate(coefs_2):
                f.write('{:.10e} '.format(coef))
                if c % 5 == 4 and c != len(coefs_2)-1:
                    f.write('\n')

            f.write('\n')
Ejemplo n.º 7
0
    def write(self, f, spc):
        '''
        Write LAMMPS coefficient file
        '''
        a = self.bounds[0][0]
        b = self.bounds[1][0]
        order = self.grid_num

        coefs_2 = self.mean.__coeffs__

        elem1 = Z_to_element(spc[0])
        elem2 = Z_to_element(spc[1])
        header_2 = '{elem1} {elem2} {a} {b} {order}\n'\
            .format(elem1=elem1, elem2=elem2, a=a, b=b, order=order)
        f.write(header_2)

        for c, coef in enumerate(coefs_2):
            f.write('{:.10e} '.format(coef))
            if c % 5 == 4 and c != len(coefs_2) - 1:
                f.write('\n')

        f.write('\n')
Ejemplo n.º 8
0
    def update_gp_and_print(self,
                            frame: Structure,
                            train_atoms: List[int],
                            uncertainties: List[int] = None,
                            train: bool = True):
        """
        Update the internal GP model training set with a list of training
        atoms indexing atoms within the frame. If train is True, re-train
        the GP by optimizing hyperparameters.
        :param frame: Structure to train on
        :param train_atoms: Index atoms to train on
        :param uncertainties: Uncertainties to print, pass in [] to silence
        :param train: Train or not
        :return: None
        """

        # Group added atoms by species for easier output
        added_species = [
            Z_to_element(frame.coded_species[at]) for at in train_atoms
        ]
        added_atoms = {spec: [] for spec in set(added_species)}

        for atom, spec in zip(train_atoms, added_species):
            added_atoms[spec].append(atom)

        if self.verbose:
            self.output.write_to_log(
                '\nAdding atom(s) '
                f'{json.dumps(added_atoms,cls=NumpyEncoder)}'
                ' to the training set.\n')

        if uncertainties is None or len(uncertainties) != 0:
            uncertainties = frame.stds[train_atoms]

        if self.verbose and len(uncertainties) != 0:
            self.output.write_to_log(f'Uncertainties: '
                                     f'{uncertainties}.\n',
                                     flush=True)

        # update gp model; handling differently if it's an MGP
        if not self.mgp:
            self.gp.update_db(frame, frame.forces, custom_range=train_atoms)

            if train:
                self.train_gp()

        else:
            raise NotImplementedError
Ejemplo n.º 9
0
    def training_statistics(self) -> dict:
        """
        Return a dictionary with statistics about the current training data.
        Useful for quickly summarizing info about the GP.
        :return:
        """

        data = {}

        data['N'] = len(self.training_data)

        # Count all of the present species in the atomic env. data
        present_species = []
        for env, force in zip(self.training_data, self.training_labels):
            present_species.append(Z_to_element(env.structure.coded_species[
                                                    env.atom]))

        # Summarize the relevant information
        data['species'] = list(set(present_species))
        data['envs_by_species'] = dict(Counter(present_species))

        return data
Ejemplo n.º 10
0
    def GenGrid(self, GP):
        '''
        To use GP to predict value on each grid point, we need to generate the
        kernel vector kv whose length is the same as the training set size.

        1. We divide the training set into several batches, corresponding to
           different segments of kv
        2. Distribute each batch to a processor, i.e. each processor calculate
           the kv segment of one batch for all grids
        3. Collect kv segments and form a complete kv vector for each grid,
           and calculate the grid value by multiplying the complete kv vector
           with GP.alpha
        '''

        kernel_info = get_2bkernel(GP)

        if (self.n_cpus is None):
            processes = mp.cpu_count()
        else:
            processes = self.n_cpus

        # ------ construct grids ------
        nop = self.grid_num
        bond_lengths = np.linspace(self.bounds[0][0], self.bounds[1][0], nop)
        bond_means = np.zeros([nop])
        if not self.mean_only:
            bond_vars = np.zeros([nop, len(GP.alpha)])
        else:
            bond_vars = None
        env12 = AtomicEnvironment(self.bond_struc, 0, GP.cutoffs)

        with mp.Pool(processes=processes) as pool:
            # A_list = pool.map(self._GenGrid_inner_most, pool_list)
            # break it into pieces
            size = len(GP.training_data)
            block_id, nbatch = partition_c(self.n_sample, size, processes)

            k12_slice = []
            k12_v_all = np.zeros([len(bond_lengths), size * 3])
            count = 0
            base = 0
            for ibatch in range(nbatch):
                s, e = block_id[ibatch]
                k12_slice.append(
                    pool.apply_async(self._GenGrid_inner,
                                     args=(GP.name, s, e, bond_lengths, env12,
                                           kernel_info)))
                count += 1
                if (count > processes * 2):
                    for ibase in range(count):
                        s, e = block_id[ibase + base]
                        k12_v_all[:, s * 3:e * 3] = k12_slice[ibase].get()
                    del k12_slice
                    k12_slice = []
                    count = 0
                    base = ibatch + 1
            if (count > 0):
                for ibase in range(count):
                    s, e = block_id[ibase + base]
                    k12_v_all[:, s * 3:e * 3] = k12_slice[ibase].get()
                del k12_slice
            pool.close()
            pool.join()

        for b, r in enumerate(bond_lengths):
            k12_v = k12_v_all[b, :]
            bond_means[b] = np.matmul(k12_v, GP.alpha)
            if not self.mean_only:
                bond_vars[b, :] = solve_triangular(GP.l_mat, k12_v, lower=True)

        write_species_name = ''
        for x in self.bond_struc.coded_species:
            write_species_name += "_" + Z_to_element(x)
        # ------ save mean and var to file -------
        np.save('grid2_mean' + write_species_name, bond_means)
        np.save('grid2_var' + write_species_name, bond_vars)

        return bond_means, bond_vars
Ejemplo n.º 11
0
    def to_xyz(self,
               extended_xyz: bool = True,
               print_stds: bool = False,
               print_forces: bool = False,
               print_max_stds: bool = False,
               write_file: str = '') -> str:
        """
        Convenience function which turns a structure into an extended .xyz
        file; useful for further input into visualization programs like VESTA
        or Ovito. Can be saved to an output file via write_file.

        :param print_stds: Print the stds associated with the structure.
        :param print_forces:
        :param extended_xyz:
        :param print_max_stds:
        :param write_file:
        :return:
        """
        species_list = [Z_to_element(x) for x in self.coded_species]
        xyz_str = ''
        xyz_str += f'{len(self.coded_species)} \n'

        # Add header line with info about lattice and properties if extended
        #  xyz option is called.
        if extended_xyz:
            cell = self.cell

            xyz_str += f'Lattice="{cell[0,0]} {cell[0,1]} {cell[0,2]}'
            xyz_str += f' {cell[1,0]} {cell[1,1]} {cell[1,2]}'
            xyz_str += f' {cell[2,0]} {cell[2,1]} {cell[2,2]}"'
            xyz_str += f' Proprties="species:S:1:pos:R:3'

            if print_stds:
                xyz_str += ':stds:R:3'
                stds = self.stds
            if print_forces:
                xyz_str += ':forces:R:3'
                forces = self.forces
            if print_max_stds:
                xyz_str += ':max_std:R:1'
            xyz_str += '\n'
        else:
            xyz_str += '\n'

        for i, pos in enumerate(self.positions):
            # Write positions
            xyz_str += f"{species_list[i]} {pos[0]} {pos[1]} {pos[2]}"

            # If extended XYZ: Add in extra information
            if print_stds and extended_xyz:
                xyz_str += f" {stds[i,0]} {stds[i,1]} {stds[i,2]}"
            if print_forces and extended_xyz:
                xyz_str += f" {forces[i,0]} {forces[i,1]} {forces[i,2]}"
            if print_max_stds and extended_xyz:
                xyz_str += f" {np.max(stds[i,:])} "
            xyz_str += '\n'

        # Write to file, optionally
        if write_file:
            with open(write_file, 'w') as f:
                f.write(xyz_str)

        return xyz_str
Ejemplo n.º 12
0
    def write_gp_dft_comparison(self,
                                curr_step,
                                frame,
                                start_time,
                                dft_forces,
                                error,
                                local_energies=None,
                                KE=None):
        """
        write the comparison to logfile
        :param dft_forces:
        :param mae:
        :param pmae: dictionary of per species mae
        :param mac:
        :param KE:
        :param curr_step:
        :param frame:
        :param local_energies:
        :param start_time:
        :return:
        """
        string = ''

        # Mark if a frame had DFT forces with an asterisk
        string += f"\n*-Frame: {curr_step}"

        # Construct Header line
        string += '\nEl  Position (A) \t\t\t\t '
        string += 'GP Force (ev/A)  \t\t\t\t'
        string += 'Std. Dev (ev/A) \t\t\t\t'
        string += 'DFT Force (ev/A)  \t\t\t\t \n'

        # Construct atom-by-atom description
        for i in range(len(frame.positions)):
            string += f"{frame.species_labels[i]} "
            for j in range(3):
                string += f"{frame.positions[i][j]:10.3} "
            string += '\t'
            for j in range(3):
                string += f"{frame.forces[i][j]:10.3} "
            string += '\t'
            for j in range(3):
                string += f"{frame.stds[i][j]:10.3} "
            string += '\t'
            for j in range(3):
                string += f"{dft_forces[i][j]:10.3} "
            string += '\n'

        string += '\n'

        self.write_xyz_config(curr_step, frame, True)
        self.write_xyz(curr_step, frame.stds, frame.species_labels, "std",
                       "* ")

        mae = np.mean(error) * 1000
        mac = np.mean(np.abs(dft_forces)) * 1000
        string += f'mean absolute error: {mae:.2f} meV/A\n'
        string += f'mean absolute dft component: {mac:.2f} meV/A\n'
        stat = f'{curr_step} {mae:.2} {mac:.2}'

        mae_ps = {}
        count_ps = {}
        species = [Z_to_element(Z) for Z in set(frame.coded_species)]
        for ele in species:
            mae_ps[ele] = 0
            count_ps[ele] = 0
        for atom in range(frame.nat):
            Z = frame.coded_species[atom]
            ele = Z_to_element(Z)
            mae_ps[ele] += np.sum(error[atom, :])
            count_ps[ele] += 1

        string += "mae per species\n"
        for ele in species:
            if (count_ps[ele] > 0):
                mae_ps[ele] /= (count_ps[ele] * 3)
                mae_ps[ele] *= 1000  # Put in meV/A
                string += f"type {ele} mae: {mae_ps[ele]:.2f} meV/A\n"
            stat += f' {mae_ps[ele]:.2f}'

        # calculate potential and total energy
        if local_energies is not None:
            pot_en = np.sum(local_energies)
            tot_en = KE + pot_en
            string += f'potential energy: {pot_en:10.6} eV\n'
            string += f'total energy: {tot_en:10.6} eV \n'
            stat += f' {pot_en:10.6} {tot_en:10.6}'

        dt = time.time() - start_time
        string += f'wall time from start: {dt:10.2}\n'
        stat += f' {dt}\n'

        self.outfiles['log'].write(string)
        self.outfiles['stat'].write(stat)

        if self.always_flush:
            self.outfiles['log'].flush()
Ejemplo n.º 13
0
    def write_gp_dft_comparison(self, curr_step, frame,
                                start_time, dft_forces,
                                error, local_energies=None, KE=None,
                                mgp= False):
        """ write the comparison to logfile

        :param curr_step: current timestep
        :param frame: Structure object that contain the current GP calculation results
        :param start_time: start time for time profiling
        :param dft_forces: list of forces computed by DFT
        :param error: list of force differences between DFT and GP prediction
        :param local_energies: local atomic energy
        :param KE: total kinetic energy

        :return:
        """

        string = ''

        # Mark if a frame had DFT forces with an asterisk
        string += f"\n*-Frame: {curr_step}"

        # Construct Header line
        string += '\nEl  Position (A) \t\t\t\t '
        if mgp:
            string += 'M'
        string += 'GP Force (ev/A)  \t\t\t\t'
        string += 'Std. Dev (ev/A) \t\t\t\t'
        string += 'DFT Force (ev/A)  \t\t\t\t \n'

        # Construct atom-by-atom description
        for i in range(len(frame.positions)):
            string += f"{frame.species_labels[i]} "
            for j in range(3):
                string += f"{frame.positions[i][j]:10.5} "
            string += '\t'
            for j in range(3):
                string += f"{frame.forces[i][j]:10.5} "
            string += '\t'
            for j in range(3):
                string += f"{frame.stds[i][j]:10.5} "
            string += '\t'
            for j in range(3):
                string += f"{dft_forces[i][j]:10.5} "
            string += '\n'

        string += '\n'

        # self.write_xyz_config(curr_step, frame, forces=frame.forces,
        #                       stds=frame.stds, forces_2=dft_forces,
        #                       dft_step=True)

        mae = np.nanmean(error) * 1000
        mac = np.mean(np.abs(dft_forces)) * 1000
        string += f'mean absolute error: {mae:.2f} meV/A\n'
        string += f'mean absolute dft component: {mac:.2f} meV/A\n'
        stat = f'{curr_step} {mae:.2} {mac:.2}'

        mae_per_species = {}
        count_per_species = {}
        species = [Z_to_element(Z) for Z in set(frame.coded_species)]
        for ele in species:
            mae_per_species[ele] = 0
            count_per_species[ele] = 0

        for atom in range(frame.nat):
            Z = frame.coded_species[atom]
            ele = Z_to_element(Z)
            if np.isnan(np.sum(error[atom, :])):
                continue
            mae_per_species[ele] += np.sum(error[atom, :])
            count_per_species[ele] += 1

        string += "mae per species\n"
        for ele in species:
            if count_per_species[ele] > 0:
                mae_per_species[ele] /= (count_per_species[ele] * 3)
                mae_per_species[ele] *= 1000  # Put in meV/A
                string += f"type {ele} mae: {mae_per_species[ele]:.2f} meV/A\n"
            stat += f' {mae_per_species[ele]:.2f}'

        # calculate potential and total energy
        if local_energies is not None:
            pot_en = np.sum(local_energies)
            tot_en = KE + pot_en
            string += f'potential energy: {pot_en:10.6} eV\n'
            string += f'total energy: {tot_en:10.6} eV \n'
            stat += f' {pot_en:10.6} {tot_en:10.6}'

        dt = time.time() - start_time
        string += f'wall time from start: {dt:10.2}\n'
        stat += f' {dt}\n'

        self.outfiles['log'].write(string)
        # self.outfiles['stat'].write(stat)

        if self.always_flush:
            self.outfiles['log'].flush()