def evaluate(self, test_structures, ref_energies, ref_forces, ref_stresses): """ Evaluate energies, forces and stresses of structures with trained interatomic potentials. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. ref_energies ([float]): List of DFT-calculated total energies of each structure in structures list. ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. ref_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. """ predict_pool = pool_from(test_structures, ref_energies, ref_forces, ref_stresses) _, df_orig = convert_docs(predict_pool) _, df_predict = convert_docs(pool_from(test_structures)) outputs = self.model.predict(inputs=test_structures, override=True) df_predict['y_orig'] = df_predict['n'] * outputs return df_orig, df_predict
def train(self, train_structures, energies, forces, stresses=None, **kwargs): """ Training data with model. Args: train_structures ([Structure]): The list of Pymatgen Structure object. energies ([float]): The list of total energies of each structure in structures list. energies ([float]): List of total energies of each structure in structures list. forces ([np.array]): List of (m, 3) forces array of each structure with m atoms in structures list. m can be varied with each single structure case. stresses (list): List of (6, ) virial stresses of each structure in structures list. """ train_pool = pool_from(train_structures, energies, forces, stresses) _, df = convert_docs(train_pool) ytrain = df['y_orig'] / df['n'] self.model.fit(inputs=train_structures, outputs=ytrain, **kwargs) self.specie = Element(train_structures[0].symbol_set[0])
def test_convert_docs(self): _, df = convert_docs(self.test_pool, include_stress=False) test_energies = df[df['dtype'] == 'energy']['y_orig'] self.assertFalse(np.any(test_energies - self.test_energies)) test_forces = df[df['dtype'] == 'force']['y_orig'] for force1, force2 in zip(test_forces, np.array(self.test_forces).ravel()): self.assertEqual(force1, force2) _, df = convert_docs(self.test_pool, include_stress=True) test_energies = df[df['dtype'] == 'energy']['y_orig'] self.assertFalse(np.any(test_energies - self.test_energies)) test_forces = df[df['dtype'] == 'force']['y_orig'] for force1, force2 in zip(test_forces, np.array(self.test_forces).ravel()): self.assertEqual(force1, force2) test_stresses = df[df['dtype'] == 'stress']['y_orig'] for stress1, stress2 in zip(test_stresses, np.array(self.test_stresses).ravel()): self.assertEqual(stress1, stress2)
def read_cfgs(self, filename, symbol): """ Read the configuration file. Args: filename (str): The configuration file to be read. symbol (str): The element symbol. """ data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('BEGIN_CFG\n(.*?)\nEND_CFG', re.S) size_pattern = re.compile('Size\n(.*?)\n SuperCell', re.S | re.I) lattice_pattern = re.compile('SuperCell\n(.*?)\n AtomData', re.S | re.I) position_pattern = re.compile('fz\n(.*?)\n Energy', re.S) energy_pattern = re.compile('Energy\n(.*?)\n (?=PlusStress|Stress)', re.S) stress_pattern = re.compile('xy\n(.*?)(?=\n|$)', re.S) formatify = lambda string: [float(s) for s in string.split()] for block in block_pattern.findall(lines): d = {'outputs': {}} size_str = size_pattern.findall(block)[0] size = int(size_str.lstrip()) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice( np.array(list(map(formatify, lattice_str.split('\n'))))) position_str = position_pattern.findall(block)[0] position = np.array(list(map(formatify, position_str.split('\n')))) forces = position[:, 5:8].tolist() position = position[:, 2:5] energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) stress_str = stress_pattern.findall(block)[0] virial_stress = np.array(list(map(formatify, stress_str.split()))).reshape( 6, ).tolist() virial_stress = [ virial_stress[self.mtp_stress_order.index(n)] for n in self.vasp_stress_order ] struct = Structure(lattice=lattice, species=[symbol] * size, coords=position, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(self, filename='output.data'): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('begin\n(.*?)end', re.S) lattice_pattern = re.compile('lattice(.*?)\n') position_pattern = re.compile('atom(.*?)\n') energy_pattern = re.compile('energy(.*?)\n') for block in block_pattern.findall(lines): d = {'outputs': {}} lattice_str = lattice_pattern.findall(block) lattice = Lattice(np.array([latt.split() for latt in lattice_str], dtype=np.float) * self.bohr_to_angstrom) position_str = position_pattern.findall(block) positions = pd.DataFrame([pos.split() for pos in position_str]) positions.columns = \ ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz'] coords = np.array(positions.loc[:, ['x', 'y', 'z']], dtype=np.float) coords = coords * self.bohr_to_angstrom species = np.array(positions['specie']) forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']], dtype=np.float) forces = forces / self.eV_to_Ha / self.bohr_to_angstrom energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) / self.eV_to_Ha struct = Structure(lattice=lattice, species=species, coords=coords, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy d['outputs']['forces'] = forces d['num_atoms'] = len(struct) data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(self, filename, predict=False): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ type_convert = {'R': np.float32, 'I': np.int, 'S': np.str} data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() repl = re.compile('AT ') lines = repl.sub('', string=lines) block_pattern = re.compile( r'(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S) lattice_pattern = re.compile(r'Lattice="(.+)"') # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I) energy_pattern = re.compile( r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)') # stress_pattern = re.compile('dft_virial={(.+)}') stress_pattern = re.compile(r'dft_virial=({|)(.+?)(}|) \S.*') properties_pattern = re.compile(r'properties=(\S+)', re.I) # position_pattern = re.compile('\n(.+)', re.S) position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S) # formatify = lambda string: [float(s) for s in string.split()] for (size, block) in block_pattern.findall(lines): d = {'outputs': {}} size = int(size) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice( list(map(lambda s: float(s), lattice_str.split()))) # energy_str = energy_pattern.findall(block)[0] energy_str = energy_pattern.findall(block)[-1] energy = float(energy_str) # stress_str = stress_pattern.findall(block)[0] stress_str = stress_pattern.findall(block)[0][1] virial_stress = np.array( list(map(lambda s: float(s), stress_str.split()))) virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]] properties = properties_pattern.findall(block)[0].split(":") labels_columns = OrderedDict() labels = defaultdict() for i in range(0, len(properties), 3): labels_columns[properties[i]] = [ int(properties[i + 2]), properties[i + 1] ] position_str = position_pattern.findall(block)[0].split('\n') position = np.array([p.split() for p in position_str]) column_index = 0 for key in labels_columns: num_columns, dtype = labels_columns[key] labels[key] = position[:, column_index:column_index + num_columns].astype(type_convert[dtype]) column_index += num_columns struct = Structure(lattice=lattice, species=labels['species'].ravel(), coords=labels['pos'], coords_are_cartesian=True) if predict: forces = labels['force'] else: forces = labels['dft_force'] d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df