Esempio n. 1
0
    def evaluate(self, test_structures, ref_energies, ref_forces,
                 ref_stresses):
        """
        Evaluate energies, forces and stresses of structures with trained
        interatomic potentials.

        Args:
            test_structures ([Structure]): List of Pymatgen Structure Objects.
            ref_energies ([float]): List of DFT-calculated total energies of
                each structure in structures list.
            ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of
                each structure with m atoms in structures list. m can be varied
                with each single structure case.
            ref_stresses (list): List of DFT-calculated (6, ) viriral stresses
                of each structure in structures list.
        """
        predict_pool = pool_from(test_structures, ref_energies, ref_forces,
                                 ref_stresses)
        _, df_orig = convert_docs(predict_pool)

        _, df_predict = convert_docs(pool_from(test_structures))
        outputs = self.model.predict(inputs=test_structures, override=True)
        df_predict['y_orig'] = df_predict['n'] * outputs

        return df_orig, df_predict
Esempio n. 2
0
    def train(self,
              train_structures,
              energies,
              forces,
              stresses=None,
              **kwargs):
        """
        Training data with model.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            energies ([float]): List of total energies of each structure in
                structures list.
            forces ([np.array]): List of (m, 3) forces array of each structure
                with m atoms in structures list. m can be varied with each
                single structure case.
            stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
        """
        train_pool = pool_from(train_structures, energies, forces, stresses)
        _, df = convert_docs(train_pool)
        ytrain = df['y_orig'] / df['n']
        self.model.fit(inputs=train_structures, outputs=ytrain, **kwargs)
        self.specie = Element(train_structures[0].symbol_set[0])
Esempio n. 3
0
    def test_convert_docs(self):
        _, df = convert_docs(self.test_pool, include_stress=False)
        test_energies = df[df['dtype'] == 'energy']['y_orig']
        self.assertFalse(np.any(test_energies - self.test_energies))
        test_forces = df[df['dtype'] == 'force']['y_orig']
        for force1, force2 in zip(test_forces, np.array(self.test_forces).ravel()):
            self.assertEqual(force1, force2)

        _, df = convert_docs(self.test_pool, include_stress=True)
        test_energies = df[df['dtype'] == 'energy']['y_orig']
        self.assertFalse(np.any(test_energies - self.test_energies))
        test_forces = df[df['dtype'] == 'force']['y_orig']
        for force1, force2 in zip(test_forces, np.array(self.test_forces).ravel()):
            self.assertEqual(force1, force2)
        test_stresses = df[df['dtype'] == 'stress']['y_orig']
        for stress1, stress2 in zip(test_stresses, np.array(self.test_stresses).ravel()):
            self.assertEqual(stress1, stress2)
Esempio n. 4
0
    def read_cfgs(self, filename, symbol):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
            symbol (str): The element symbol.
        """
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()

        block_pattern = re.compile('BEGIN_CFG\n(.*?)\nEND_CFG', re.S)
        size_pattern = re.compile('Size\n(.*?)\n SuperCell', re.S | re.I)
        lattice_pattern = re.compile('SuperCell\n(.*?)\n AtomData',
                                     re.S | re.I)
        position_pattern = re.compile('fz\n(.*?)\n Energy', re.S)
        energy_pattern = re.compile('Energy\n(.*?)\n (?=PlusStress|Stress)',
                                    re.S)
        stress_pattern = re.compile('xy\n(.*?)(?=\n|$)', re.S)
        formatify = lambda string: [float(s) for s in string.split()]
        for block in block_pattern.findall(lines):
            d = {'outputs': {}}
            size_str = size_pattern.findall(block)[0]
            size = int(size_str.lstrip())
            lattice_str = lattice_pattern.findall(block)[0]
            lattice = Lattice(
                np.array(list(map(formatify, lattice_str.split('\n')))))
            position_str = position_pattern.findall(block)[0]
            position = np.array(list(map(formatify, position_str.split('\n'))))
            forces = position[:, 5:8].tolist()
            position = position[:, 2:5]
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip())
            stress_str = stress_pattern.findall(block)[0]
            virial_stress = np.array(list(map(formatify,
                                              stress_str.split()))).reshape(
                                                  6, ).tolist()
            virial_stress = [
                virial_stress[self.mtp_stress_order.index(n)]
                for n in self.vasp_stress_order
            ]
            struct = Structure(lattice=lattice,
                               species=[symbol] * size,
                               coords=position,
                               coords_are_cartesian=True)
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            assert size == struct.num_sites
            d['num_atoms'] = size
            d['outputs']['forces'] = forces
            d['outputs']['virial_stress'] = virial_stress

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
Esempio n. 5
0
    def read_cfgs(self, filename='output.data'):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
        """
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()

        block_pattern = re.compile('begin\n(.*?)end', re.S)
        lattice_pattern = re.compile('lattice(.*?)\n')
        position_pattern = re.compile('atom(.*?)\n')
        energy_pattern = re.compile('energy(.*?)\n')

        for block in block_pattern.findall(lines):
            d = {'outputs': {}}
            lattice_str = lattice_pattern.findall(block)
            lattice = Lattice(np.array([latt.split() for latt in lattice_str],
                                       dtype=np.float) * self.bohr_to_angstrom)
            position_str = position_pattern.findall(block)
            positions = pd.DataFrame([pos.split() for pos in position_str])
            positions.columns = \
                ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz']
            coords = np.array(positions.loc[:, ['x', 'y', 'z']], dtype=np.float)
            coords = coords * self.bohr_to_angstrom
            species = np.array(positions['specie'])
            forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']], dtype=np.float)
            forces = forces / self.eV_to_Ha / self.bohr_to_angstrom
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip()) / self.eV_to_Ha
            struct = Structure(lattice=lattice, species=species, coords=coords,
                               coords_are_cartesian=True)
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            d['outputs']['forces'] = forces
            d['num_atoms'] = len(struct)

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
Esempio n. 6
0
    def read_cfgs(self, filename, predict=False):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
        """
        type_convert = {'R': np.float32, 'I': np.int, 'S': np.str}
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()
        repl = re.compile('AT ')
        lines = repl.sub('', string=lines)

        block_pattern = re.compile(
            r'(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S)
        lattice_pattern = re.compile(r'Lattice="(.+)"')
        # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I)
        energy_pattern = re.compile(
            r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)')
        # stress_pattern = re.compile('dft_virial={(.+)}')
        stress_pattern = re.compile(r'dft_virial=({|)(.+?)(}|) \S.*')
        properties_pattern = re.compile(r'properties=(\S+)', re.I)
        # position_pattern = re.compile('\n(.+)', re.S)
        position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S)
        # formatify = lambda string: [float(s) for s in string.split()]

        for (size, block) in block_pattern.findall(lines):
            d = {'outputs': {}}
            size = int(size)
            lattice_str = lattice_pattern.findall(block)[0]
            lattice = Lattice(
                list(map(lambda s: float(s), lattice_str.split())))
            # energy_str = energy_pattern.findall(block)[0]
            energy_str = energy_pattern.findall(block)[-1]
            energy = float(energy_str)
            # stress_str = stress_pattern.findall(block)[0]
            stress_str = stress_pattern.findall(block)[0][1]
            virial_stress = np.array(
                list(map(lambda s: float(s), stress_str.split())))
            virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]]
            properties = properties_pattern.findall(block)[0].split(":")
            labels_columns = OrderedDict()
            labels = defaultdict()
            for i in range(0, len(properties), 3):
                labels_columns[properties[i]] = [
                    int(properties[i + 2]), properties[i + 1]
                ]
            position_str = position_pattern.findall(block)[0].split('\n')
            position = np.array([p.split() for p in position_str])
            column_index = 0
            for key in labels_columns:
                num_columns, dtype = labels_columns[key]
                labels[key] = position[:, column_index:column_index +
                                       num_columns].astype(type_convert[dtype])
                column_index += num_columns
            struct = Structure(lattice=lattice,
                               species=labels['species'].ravel(),
                               coords=labels['pos'],
                               coords_are_cartesian=True)
            if predict:
                forces = labels['force']
            else:
                forces = labels['dft_force']
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            assert size == struct.num_sites
            d['num_atoms'] = size
            d['outputs']['forces'] = forces
            d['outputs']['virial_stress'] = virial_stress

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df