def evaluate(self, test_structures, test_energies, test_forces, test_stresses=None): """ Evaluate energies, forces and stresses of structures with trained machinea learning potentials. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. test_energies ([float]): List of DFT-calculated total energies of each structure in structures list. test_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. test_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. """ predict_pool = pool_from(test_structures, test_energies, test_forces, test_stresses) _, df_orig = convert_docs(predict_pool) _, df_predict = convert_docs(pool_from(test_structures)) outputs = self.model.predict_objs(objs=test_structures) df_predict['y_orig'] = df_predict['n'] * outputs return df_orig, df_predict
def evaluate(self, test_structures, test_energies, test_forces, test_stresses=None, include_stress=False): """ Evaluate energies, forces and stresses of structures with trained machinea learning potentials. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. test_energies ([float]): List of DFT-calculated total energies of each structure in structures list. test_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. test_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. include_stress (bool): Whether to include stress components. """ test_structures, test_forces, test_stresses = check_structures_forces_stresses( test_structures, test_forces, test_stresses) predict_pool = pool_from(test_structures, test_energies, test_forces, test_stresses) _, df_orig = convert_docs(predict_pool, include_stress=include_stress) _, df_predict = convert_docs(pool_from(test_structures), include_stress=include_stress) outputs = self.model.predict_objs(objs=test_structures) df_predict["y_orig"] = df_predict["n"] * outputs return df_orig, df_predict
def train(self, train_structures, train_energies, train_forces, train_stresses=None, include_stress=False, **kwargs): """ Training data with models. Args: train_structures ([Structure]): The list of Pymatgen Structure object. energies ([float]): The list of total energies of each structure in structures list. train_energies ([float]): List of total energies of each structure in structures list. train_forces ([np.array]): List of (m, 3) forces array of each structure with m atoms in structures list. m can be varied with each single structure case. train_stresses (list): List of (6, ) virial stresses of each structure in structures list. include_stress (bool): Whether to include stress components. """ train_structures, train_forces, train_stresses = check_structures_forces_stresses( train_structures, train_forces, train_stresses) train_pool = pool_from(train_structures, train_energies, train_forces, train_stresses) _, df = convert_docs(train_pool, include_stress=include_stress) ytrain = df["y_orig"] / df["n"] xtrain = self.model.describer.transform(train_structures) self.model.fit(features=xtrain, targets=ytrain, **kwargs)
def train(self, train_structures, train_energies, train_forces, train_stresses=None, **kwargs): """ Training data with model. Args: train_structures ([Structure]): The list of Pymatgen Structure object. energies ([float]): The list of total energies of each structure in structures list. train_energies ([float]): List of total energies of each structure in structures list. train_forces ([np.array]): List of (m, 3) forces array of each structure with m atoms in structures list. m can be varied with each single structure case. train_stresses (list): List of (6, ) virial stresses of each structure in structures list. """ train_pool = pool_from(train_structures, train_energies, train_forces, train_stresses) _, df = convert_docs(train_pool) ytrain = df['y_orig'] / df['n'] xtrain = self.model.describer.transform(train_structures) self.model.fit(features=xtrain, targets=ytrain, **kwargs)
def read_cfgs(self, filename="output.data"): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ data_pool = [] with zopen(filename, "rt") as f: lines = f.read() block_pattern = re.compile("begin\n(.*?)end", re.S) lattice_pattern = re.compile("lattice(.*?)\n") position_pattern = re.compile("atom(.*?)\n") energy_pattern = re.compile("energy(.*?)\n") for block in block_pattern.findall(lines): d = {"outputs": {}} lattice_str = lattice_pattern.findall(block) lattice = Lattice( np.array([latt.split() for latt in lattice_str], dtype=np.float64) * self.bohr_to_angstrom) position_str = position_pattern.findall(block) positions = pd.DataFrame([pos.split() for pos in position_str]) positions.columns = [ "x", "y", "z", "specie", "charge", "atomic_energy", "fx", "fy", "fz" ] coords = np.array(positions.loc[:, ["x", "y", "z"]], dtype=np.float64) coords = coords * self.bohr_to_angstrom species = np.array(positions["specie"]) forces = np.array(positions.loc[:, ["fx", "fy", "fz"]], dtype=np.float64) forces = forces / self.eV_to_Ha / self.bohr_to_angstrom energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) / self.eV_to_Ha struct = Structure(lattice=lattice, species=species, coords=coords, coords_are_cartesian=True) d["structure"] = struct.as_dict() d["outputs"]["energy"] = energy d["outputs"]["forces"] = forces d["num_atoms"] = len(struct) data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(self, filename='output.data'): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('begin\n(.*?)end', re.S) lattice_pattern = re.compile('lattice(.*?)\n') position_pattern = re.compile('atom(.*?)\n') energy_pattern = re.compile('energy(.*?)\n') for block in block_pattern.findall(lines): d = {'outputs': {}} lattice_str = lattice_pattern.findall(block) lattice = Lattice( np.array([latt.split() for latt in lattice_str], dtype=np.float) * self.bohr_to_angstrom) position_str = position_pattern.findall(block) positions = pd.DataFrame([pos.split() for pos in position_str]) positions.columns = \ ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz'] coords = np.array(positions.loc[:, ['x', 'y', 'z']], dtype=np.float) coords = coords * self.bohr_to_angstrom species = np.array(positions['specie']) forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']], dtype=np.float) forces = forces / self.eV_to_Ha / self.bohr_to_angstrom energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) / self.eV_to_Ha struct = Structure(lattice=lattice, species=species, coords=coords, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy d['outputs']['forces'] = forces d['num_atoms'] = len(struct) data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(filename, predict=False): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ type_convert = {'R': np.float32, 'I': np.int, 'S': np.str} data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() repl = re.compile('AT ') lines = repl.sub('', string=lines) block_pattern = re.compile( r'(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S) lattice_pattern = re.compile(r'Lattice="(.+)"') # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I) energy_pattern = re.compile( r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)') # stress_pattern = re.compile('dft_virial={(.+)}') stress_pattern = re.compile(r'dft_virial=({|)(.+?)(}|) \S.*') properties_pattern = re.compile(r'properties=(\S+)', re.I) # position_pattern = re.compile('\n(.+)', re.S) position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S) # formatify = lambda string: [float(s) for s in string.split()] for (size, block) in block_pattern.findall(lines): d = {'outputs': {}} size = int(size) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice( list(map(lambda s: float(s), lattice_str.split()))) energy_str = energy_pattern.findall(block)[-1] energy = float(energy_str) stress_str = stress_pattern.findall(block)[0][1] virial_stress = np.array( list(map(lambda s: float(s), stress_str.split()))) virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]] properties = properties_pattern.findall(block)[0].split(":") labels_columns = OrderedDict() labels = defaultdict() for i in range(0, len(properties), 3): labels_columns[properties[i]] = [ int(properties[i + 2]), properties[i + 1] ] position_str = position_pattern.findall(block)[0].split('\n') position = np.array([p.split() for p in position_str]) column_index = 0 for key in labels_columns: num_columns, dtype = labels_columns[key] labels[key] = position[:, column_index:column_index + num_columns].astype(type_convert[dtype]) column_index += num_columns struct = Structure(lattice=lattice, species=labels['species'].ravel(), coords=labels['pos'], coords_are_cartesian=True) if predict: forces = labels['force'] else: forces = labels['dft_force'] d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(self, filename): """ Args: filename (str): The configuration file to be read. """ def formatify(string): return [float(s) for s in string.split()] if not self.elements: raise ValueError("No species given.") data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('BEGIN_CFG\n(.*?)\nEND_CFG', re.S) size_pattern = re.compile('Size\n(.*?)\n SuperCell', re.S | re.I) lattice_pattern = re.compile('SuperCell\n(.*?)\n AtomData', re.S | re.I) position_pattern = re.compile('fz\n(.*?)\n Energy', re.S) energy_pattern = re.compile('Energy\n(.*?)\n (?=PlusStress|Stress)', re.S) stress_pattern = re.compile('xy\n(.*?)(?=\n|$)', re.S) for block in block_pattern.findall(lines): d = {'outputs': {}} size_str = size_pattern.findall(block)[0] size = int(size_str.lstrip()) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice( np.array(list(map(formatify, lattice_str.split('\n'))))) position_str = position_pattern.findall(block)[0] position = np.array(list(map(formatify, position_str.split('\n')))) species = np.array(self.elements)[position[:, 1].astype(np.int)] forces = position[:, 5:8].tolist() position = position[:, 2:5] energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) stress_str = stress_pattern.findall(block)[0] virial_stress = np.array(list(map(formatify, stress_str.split()))).reshape( 6, ).tolist() virial_stress = [ virial_stress[self.mtp_stress_order.index(n)] for n in self.vasp_stress_order ] struct = Structure(lattice=lattice, species=species, coords=position, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df