Python convert_docs Examples, maml.utils.convert_docs Python Examples

Example #1

0

Show file

    def evaluate(self,
                 test_structures,
                 test_energies,
                 test_forces,
                 test_stresses=None):
        """
        Evaluate energies, forces and stresses of structures with trained
        machinea learning potentials.

        Args:
            test_structures ([Structure]): List of Pymatgen Structure Objects.
            test_energies ([float]): List of DFT-calculated total energies of
                each structure in structures list.
            test_forces ([np.array]): List of DFT-calculated (m, 3) forces of
                each structure with m atoms in structures list. m can be varied
                with each single structure case.
            test_stresses (list): List of DFT-calculated (6, ) viriral stresses
                of each structure in structures list.
        """
        predict_pool = pool_from(test_structures, test_energies, test_forces,
                                 test_stresses)
        _, df_orig = convert_docs(predict_pool)
        _, df_predict = convert_docs(pool_from(test_structures))
        outputs = self.model.predict_objs(objs=test_structures)
        df_predict['y_orig'] = df_predict['n'] * outputs

        return df_orig, df_predict

Example #2

0

Show file

    def evaluate(self,
                 test_structures,
                 test_energies,
                 test_forces,
                 test_stresses=None,
                 include_stress=False):
        """
        Evaluate energies, forces and stresses of structures with trained
        machinea learning potentials.

        Args:
            test_structures ([Structure]): List of Pymatgen Structure Objects.
            test_energies ([float]): List of DFT-calculated total energies of
                each structure in structures list.
            test_forces ([np.array]): List of DFT-calculated (m, 3) forces of
                each structure with m atoms in structures list. m can be varied
                with each single structure case.
            test_stresses (list): List of DFT-calculated (6, ) viriral stresses
                of each structure in structures list.
            include_stress (bool): Whether to include stress components.
        """
        test_structures, test_forces, test_stresses = check_structures_forces_stresses(
            test_structures, test_forces, test_stresses)
        predict_pool = pool_from(test_structures, test_energies, test_forces,
                                 test_stresses)
        _, df_orig = convert_docs(predict_pool, include_stress=include_stress)
        _, df_predict = convert_docs(pool_from(test_structures),
                                     include_stress=include_stress)
        outputs = self.model.predict_objs(objs=test_structures)
        df_predict["y_orig"] = df_predict["n"] * outputs

        return df_orig, df_predict

Example #3

0

Show file

    def train(self,
              train_structures,
              train_energies,
              train_forces,
              train_stresses=None,
              include_stress=False,
              **kwargs):
        """
        Training data with models.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            train_energies ([float]): List of total energies of each structure in
                structures list.
            train_forces ([np.array]): List of (m, 3) forces array of each
                structure with m atoms in structures list. m can be varied with
                each single structure case.
            train_stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
            include_stress (bool): Whether to include stress components.
        """
        train_structures, train_forces, train_stresses = check_structures_forces_stresses(
            train_structures, train_forces, train_stresses)
        train_pool = pool_from(train_structures, train_energies, train_forces,
                               train_stresses)
        _, df = convert_docs(train_pool, include_stress=include_stress)
        ytrain = df["y_orig"] / df["n"]
        xtrain = self.model.describer.transform(train_structures)
        self.model.fit(features=xtrain, targets=ytrain, **kwargs)

Example #4

0

Show file

    def train(self,
              train_structures,
              train_energies,
              train_forces,
              train_stresses=None,
              **kwargs):
        """
        Training data with model.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            train_energies ([float]): List of total energies of each structure in
                structures list.
            train_forces ([np.array]): List of (m, 3) forces array of each
                structure with m atoms in structures list. m can be varied with
                each single structure case.
            train_stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
        """
        train_pool = pool_from(train_structures, train_energies, train_forces,
                               train_stresses)
        _, df = convert_docs(train_pool)
        ytrain = df['y_orig'] / df['n']
        xtrain = self.model.describer.transform(train_structures)
        self.model.fit(features=xtrain, targets=ytrain, **kwargs)

Example #5

0

Show file

    def read_cfgs(self, filename="output.data"):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
        """
        data_pool = []
        with zopen(filename, "rt") as f:
            lines = f.read()

        block_pattern = re.compile("begin\n(.*?)end", re.S)
        lattice_pattern = re.compile("lattice(.*?)\n")
        position_pattern = re.compile("atom(.*?)\n")
        energy_pattern = re.compile("energy(.*?)\n")

        for block in block_pattern.findall(lines):
            d = {"outputs": {}}
            lattice_str = lattice_pattern.findall(block)
            lattice = Lattice(
                np.array([latt.split() for latt in lattice_str],
                         dtype=np.float64) * self.bohr_to_angstrom)
            position_str = position_pattern.findall(block)
            positions = pd.DataFrame([pos.split() for pos in position_str])
            positions.columns = [
                "x", "y", "z", "specie", "charge", "atomic_energy", "fx", "fy",
                "fz"
            ]
            coords = np.array(positions.loc[:, ["x", "y", "z"]],
                              dtype=np.float64)
            coords = coords * self.bohr_to_angstrom
            species = np.array(positions["specie"])
            forces = np.array(positions.loc[:, ["fx", "fy", "fz"]],
                              dtype=np.float64)
            forces = forces / self.eV_to_Ha / self.bohr_to_angstrom
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip()) / self.eV_to_Ha
            struct = Structure(lattice=lattice,
                               species=species,
                               coords=coords,
                               coords_are_cartesian=True)
            d["structure"] = struct.as_dict()
            d["outputs"]["energy"] = energy
            d["outputs"]["forces"] = forces
            d["num_atoms"] = len(struct)

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df

Example #6

0

Show file

File: _nnp.py Project: zhenming-xu/maml

    def read_cfgs(self, filename='output.data'):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
        """
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()

        block_pattern = re.compile('begin\n(.*?)end', re.S)
        lattice_pattern = re.compile('lattice(.*?)\n')
        position_pattern = re.compile('atom(.*?)\n')
        energy_pattern = re.compile('energy(.*?)\n')

        for block in block_pattern.findall(lines):
            d = {'outputs': {}}
            lattice_str = lattice_pattern.findall(block)
            lattice = Lattice(
                np.array([latt.split() for latt in lattice_str],
                         dtype=np.float) * self.bohr_to_angstrom)
            position_str = position_pattern.findall(block)
            positions = pd.DataFrame([pos.split() for pos in position_str])
            positions.columns = \
                ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz']
            coords = np.array(positions.loc[:, ['x', 'y', 'z']],
                              dtype=np.float)
            coords = coords * self.bohr_to_angstrom
            species = np.array(positions['specie'])
            forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']],
                              dtype=np.float)
            forces = forces / self.eV_to_Ha / self.bohr_to_angstrom
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip()) / self.eV_to_Ha
            struct = Structure(lattice=lattice,
                               species=species,
                               coords=coords,
                               coords_are_cartesian=True)
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            d['outputs']['forces'] = forces
            d['num_atoms'] = len(struct)

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df

Example #7

0

Show file

    def read_cfgs(filename, predict=False):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
        """
        type_convert = {'R': np.float32, 'I': np.int, 'S': np.str}
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()
        repl = re.compile('AT ')
        lines = repl.sub('', string=lines)

        block_pattern = re.compile(
            r'(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S)
        lattice_pattern = re.compile(r'Lattice="(.+)"')
        # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I)
        energy_pattern = re.compile(
            r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)')
        # stress_pattern = re.compile('dft_virial={(.+)}')
        stress_pattern = re.compile(r'dft_virial=({|)(.+?)(}|) \S.*')
        properties_pattern = re.compile(r'properties=(\S+)', re.I)
        # position_pattern = re.compile('\n(.+)', re.S)
        position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S)
        # formatify = lambda string: [float(s) for s in string.split()]

        for (size, block) in block_pattern.findall(lines):
            d = {'outputs': {}}
            size = int(size)
            lattice_str = lattice_pattern.findall(block)[0]
            lattice = Lattice(
                list(map(lambda s: float(s), lattice_str.split())))
            energy_str = energy_pattern.findall(block)[-1]
            energy = float(energy_str)
            stress_str = stress_pattern.findall(block)[0][1]
            virial_stress = np.array(
                list(map(lambda s: float(s), stress_str.split())))
            virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]]
            properties = properties_pattern.findall(block)[0].split(":")
            labels_columns = OrderedDict()
            labels = defaultdict()
            for i in range(0, len(properties), 3):
                labels_columns[properties[i]] = [
                    int(properties[i + 2]), properties[i + 1]
                ]
            position_str = position_pattern.findall(block)[0].split('\n')
            position = np.array([p.split() for p in position_str])
            column_index = 0
            for key in labels_columns:
                num_columns, dtype = labels_columns[key]
                labels[key] = position[:, column_index:column_index +
                                       num_columns].astype(type_convert[dtype])
                column_index += num_columns
            struct = Structure(lattice=lattice,
                               species=labels['species'].ravel(),
                               coords=labels['pos'],
                               coords_are_cartesian=True)
            if predict:
                forces = labels['force']
            else:
                forces = labels['dft_force']
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            assert size == struct.num_sites
            d['num_atoms'] = size
            d['outputs']['forces'] = forces
            d['outputs']['virial_stress'] = virial_stress

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df

Example #8

0

Show file

File: _mtp.py Project: zhenming-xu/maml

    def read_cfgs(self, filename):
        """

        Args:
            filename (str): The configuration file to be read.

        """
        def formatify(string):
            return [float(s) for s in string.split()]

        if not self.elements:
            raise ValueError("No species given.")

        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()

        block_pattern = re.compile('BEGIN_CFG\n(.*?)\nEND_CFG', re.S)
        size_pattern = re.compile('Size\n(.*?)\n SuperCell', re.S | re.I)
        lattice_pattern = re.compile('SuperCell\n(.*?)\n AtomData',
                                     re.S | re.I)
        position_pattern = re.compile('fz\n(.*?)\n Energy', re.S)
        energy_pattern = re.compile('Energy\n(.*?)\n (?=PlusStress|Stress)',
                                    re.S)
        stress_pattern = re.compile('xy\n(.*?)(?=\n|$)', re.S)
        for block in block_pattern.findall(lines):
            d = {'outputs': {}}
            size_str = size_pattern.findall(block)[0]
            size = int(size_str.lstrip())
            lattice_str = lattice_pattern.findall(block)[0]
            lattice = Lattice(
                np.array(list(map(formatify, lattice_str.split('\n')))))
            position_str = position_pattern.findall(block)[0]
            position = np.array(list(map(formatify, position_str.split('\n'))))
            species = np.array(self.elements)[position[:, 1].astype(np.int)]
            forces = position[:, 5:8].tolist()
            position = position[:, 2:5]
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip())
            stress_str = stress_pattern.findall(block)[0]
            virial_stress = np.array(list(map(formatify,
                                              stress_str.split()))).reshape(
                                                  6, ).tolist()
            virial_stress = [
                virial_stress[self.mtp_stress_order.index(n)]
                for n in self.vasp_stress_order
            ]
            struct = Structure(lattice=lattice,
                               species=species,
                               coords=position,
                               coords_are_cartesian=True)
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            assert size == struct.num_sites
            d['num_atoms'] = size
            d['outputs']['forces'] = forces
            d['outputs']['virial_stress'] = virial_stress

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df