def evaluate(self, test_structures, ref_energies, ref_forces, ref_stresses): """ Evaluate energies, forces and stresses of structures with trained interatomic potential. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. ref_energies ([float]): List of DFT-calculated total energies of each structure in structures list. ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. ref_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. """ predict_pool = pool_from(test_structures, ref_energies, ref_forces, ref_stresses) _, df_orig = convert_docs(predict_pool) data_pool = [] for struct in test_structures: d = {'outputs': {}} d['structure'] = struct.as_dict() d['num_atoms'] = len(struct) features = self.describer.describe(struct) targets = self.predictor.predict(features.values) d['outputs']['energy'] = 0 d['outputs']['forces'] = targets.reshape((-1, 3)) d['outputs']['virial_stress'] = [0., 0., 0., 0., 0., 0.] data_pool.append(d) _, df_pred = convert_docs(data_pool) return df_orig, df_pred
def evaluate(self, test_structures, ref_energies, ref_forces, ref_stresses): """ Evaluate energies, forces and stresses of structures with trained interatomic potential. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. ref_energies ([float]): List of DFT-calculated total energies of each structure in structures list. ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. ref_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. """ predict_pool = pool_from(test_structures, ref_energies, ref_forces, ref_stresses) _, df_orig = convert_docs(predict_pool) _, df_predict = convert_docs(pool_from(test_structures)) outputs = self.model.predict(inputs=test_structures, override=True) df_predict['y_orig'] = df_predict['n'] * outputs return df_orig, df_predict
def train(self, train_structures, energies, forces, stresses=None, **kwargs): """ Training data with model. Args: train_structures ([Structure]): The list of Pymatgen Structure object. energies ([float]): The list of total energies of each structure in structures list. energies ([float]): List of total energies of each structure in structures list. forces ([np.array]): List of (m, 3) forces array of each structure with m atoms in structures list. m can be varied with each single structure case. stresses (list): List of (6, ) virial stresses of each structure in structures list. """ train_pool = pool_from(train_structures, energies, forces, stresses) _, df = convert_docs(train_pool) ytrain = df['y_orig'] / df['n'] self.model.fit(inputs=train_structures, outputs=ytrain, **kwargs) self.specie = Element(train_structures[0].symbol_set[0])
def test_convert_docs(self): _, df = convert_docs(self.test_pool, include_stress=False) test_energies = df[df['dtype'] == 'energy']['y_orig'] self.assertFalse(np.any(test_energies - self.test_energies)) test_forces = df[df['dtype'] == 'force']['y_orig'] for force1, force2 in zip(test_forces, np.array(self.test_forces).ravel()): self.assertEqual(force1, force2) _, df = convert_docs(self.test_pool, include_stress=True) test_energies = df[df['dtype'] == 'energy']['y_orig'] self.assertFalse(np.any(test_energies - self.test_energies)) test_forces = df[df['dtype'] == 'force']['y_orig'] for force1, force2 in zip(test_forces, np.array(self.test_forces).ravel()): self.assertEqual(force1, force2) test_stresses = df[df['dtype'] == 'stress']['y_orig'] for stress1, stress2 in zip(test_stresses, np.array(self.test_stresses).ravel()): self.assertEqual(stress1, stress2)
def evaluate2(self, test_structures, ref_energies=None, ref_forces=None, ref_stresses=None): """ Evaluate energies, forces and stresses of structures with trained interatomic potential. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. ref_energies ([float]): List of DFT-calculated total energies of each structure in structures list. ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. ref_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. """ predict_pool = pool_from(test_structures, ref_energies, ref_forces, ref_stresses) _, df_orig = convert_docs(predict_pool) efs_calculator = EnergyForceStress(ff_settings=self) efs_results = efs_calculator.calculate(test_structures) assert len(test_structures) == len(efs_results) data_pool = [] for struct, (energy, forces, stresses) in zip(test_structures, efs_results): d = {'outputs': {}} d['structure'] = struct.as_dict() d['num_atoms'] = len(struct) d['outputs']['energy'] = energy d['outputs']['forces'] = forces d['outputs']['virial_stress'] = stresses data_pool.append(d) _, df_pred = convert_docs(data_pool) return df_orig, df_pred
def read_cfgs(self, filename, symbol): """ Args: filename (str): The configuration file to be read. symbol (str): The element symbol. """ data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('BEGIN_CFG\n(.*?)\nEND_CFG', re.S) size_pattern = re.compile('Size\n(.*?)\n SuperCell', re.S | re.I) lattice_pattern = re.compile('SuperCell\n(.*?)\n AtomData', re.S | re.I) position_pattern = re.compile('fz\n(.*?)\n Energy', re.S) energy_pattern = re.compile('Energy\n(.*?)\n Stress', re.S) stress_pattern = re.compile('xy\n(.*?)(?=\n|$)', re.S) def formatify(string): return [float(s) for s in string.split()] for block in block_pattern.findall(lines): d = {'outputs': {}} size_str = size_pattern.findall(block)[0] size = int(size_str.lstrip()) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice(np.array(list(map(formatify, lattice_str.split('\n'))))) position_str = position_pattern.findall(block)[0] position = np.array(list(map(formatify, position_str.split('\n')))) forces = position[:, 5:8].tolist() position = position[:, 2:5] energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) stress_str = stress_pattern.findall(block)[0] virial_stress = np.array(list(map(formatify, stress_str.split()))).reshape(6, ).tolist() virial_stress = [virial_stress[self.stress_order.index(n)] for n in self.vasp_stress_order] struct = Structure(lattice=lattice, species=[symbol] * size, coords=position, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(self, filename='output.data'): """ Args: filename (str): The configuration file to be read. """ data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('begin\n(.*?)end', re.S) lattice_pattern = re.compile('lattice(.*?)\n') position_pattern = re.compile('atom(.*?)\n') energy_pattern = re.compile('energy(.*?)\n') for block in block_pattern.findall(lines): d = {'outputs': {}} lattice_str = lattice_pattern.findall(block) lattice = Lattice( np.array([latt.split() for latt in lattice_str], dtype=np.float) * self.bohr_to_angstrom) position_str = position_pattern.findall(block) positions = pd.DataFrame([pos.split() for pos in position_str]) positions.columns = \ ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz'] coords = np.array(positions.loc[:, ['x', 'y', 'z']], dtype=np.float) coords = coords * self.bohr_to_angstrom species = np.array(positions['specie']) forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']], dtype=np.float) forces = forces / self.eV_to_Ha / self.bohr_to_angstrom energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) / self.eV_to_Ha struct = Structure(lattice=lattice, species=species, coords=coords, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy d['outputs']['forces'] = forces d['num_atoms'] = len(struct) data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(self, filename, predict=False): """ Args: filename (str): The configuration file to be read. """ type_convert = {'R': np.float32, 'I': np.int, 'S': np.str} data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() repl = re.compile('AT ') lines = repl.sub('', string=lines) block_pattern = re.compile( '(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S) lattice_pattern = re.compile('Lattice="(.+)"') # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I) energy_pattern = re.compile( r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)') # stress_pattern = re.compile('dft_virial={(.+)}') stress_pattern = re.compile('dft_virial=({|)(.+?)(}|) \S.*') properties_pattern = re.compile('properties=(\S+)', re.I) # position_pattern = re.compile('\n(.+)', re.S) position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S) # formatify = lambda string: [float(s) for s in string.split()] for (size, block) in block_pattern.findall(lines): d = {'outputs': {}} size = int(size) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice( list(map(lambda s: float(s), lattice_str.split()))) # energy_str = energy_pattern.findall(block)[0] energy_str = energy_pattern.findall(block)[-1] energy = float(energy_str) # stress_str = stress_pattern.findall(block)[0] stress_str = stress_pattern.findall(block)[0][1] virial_stress = np.array( list(map(lambda s: float(s), stress_str.split()))) virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]] properties = properties_pattern.findall(block)[0].split(":") labels_columns = OrderedDict() labels = defaultdict() for i in range(0, len(properties), 3): labels_columns[properties[i]] = [ int(properties[i + 2]), properties[i + 1] ] position_str = position_pattern.findall(block)[0].split('\n') position = np.array([p.split() for p in position_str]) column_index = 0 for key in labels_columns: num_columns, dtype = labels_columns[key] labels[key] = position[:, column_index:column_index + num_columns].astype(type_convert[dtype]) column_index += num_columns struct = Structure(lattice=lattice, species=labels['species'].ravel(), coords=labels['pos'], coords_are_cartesian=True) if predict: forces = labels['force'] else: forces = labels['dft_force'] d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df