Ejemplo n.º 1
0
    def evaluate(self, test_structures, ref_energies, ref_forces,
                 ref_stresses):
        """
        Evaluate energies, forces and stresses of structures with trained
        interatomic potentials.

        Args:
            test_structures ([Structure]): List of Pymatgen Structure Objects.
            ref_energies ([float]): List of DFT-calculated total energies of
                each structure in structures list.
            ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of
                each structure with m atoms in structures list. m can be varied
                with each single structure case.
            ref_stresses (list): List of DFT-calculated (6, ) viriral stresses
                of each structure in structures list.
        """
        predict_pool = pool_from(test_structures, ref_energies, ref_forces,
                                 ref_stresses)
        _, df_orig = convert_docs(predict_pool)

        _, df_predict = convert_docs(pool_from(test_structures))
        outputs = self.model.predict(inputs=test_structures, override=True)
        df_predict['y_orig'] = df_predict['n'] * outputs

        return df_orig, df_predict
Ejemplo n.º 2
0
    def train(self,
              train_structures,
              energies,
              forces,
              stresses=None,
              **kwargs):
        """
        Training data with model.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            energies ([float]): List of total energies of each structure in
                structures list.
            forces ([np.array]): List of (m, 3) forces array of each structure
                with m atoms in structures list. m can be varied with each
                single structure case.
            stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
        """
        train_pool = pool_from(train_structures, energies, forces, stresses)
        _, df = convert_docs(train_pool)
        ytrain = df['y_orig'] / df['n']
        self.model.fit(inputs=train_structures, outputs=ytrain, **kwargs)
        self.specie = Element(train_structures[0].symbol_set[0])
Ejemplo n.º 3
0
    def evaluate(self,
                 test_structures,
                 ref_energies=None,
                 ref_forces=None,
                 ref_stresses=None,
                 predict_energies=True,
                 predict_forces=True,
                 predict_stress=False):
        """
        Evaluate energies, forces and stresses of structures with trained
        interatomic potentials.

        Args:
            test_structures ([Structure]): List of Pymatgen Structure Objects.
            ref_energies ([float]): List of DFT-calculated total energies of
                each structure in structures list.
            ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of
                each structure with m atoms in structures list. m can be varied
                with each single structure case.
            ref_stresses (list): List of DFT-calculated (6, ) viriral stresses
                of each structure in structures list.
            predict_energies (bool): Whether to predict energies of configurations.
            predict_forces (bool): Whether to predict forces of configurations.
            predict_stress (bool): Whether to predict virial stress of
                configurations.
        """
        if not which('quip'):
            raise RuntimeError(
                "quip has not been found.\n",
                "Please refer to https://github.com/libAtoms/QUIP for ",
                "further detail.")
        xml_file = 'predict.xml'
        original_file = 'original.xyz'
        predict_file = 'predict.xyz'
        predict_pool = pool_from(test_structures, ref_energies, ref_forces,
                                 ref_stresses)

        with ScratchDir('.'):
            _ = self.write_param(xml_file)
            original_file = self.write_cfgs(original_file,
                                            cfg_pool=predict_pool)
            _, df_orig = self.read_cfgs(original_file)

            exe_command = ["quip"]
            exe_command.append("atoms_filename={}".format(original_file))
            exe_command.append("param_filename={}".format(xml_file))
            if predict_energies:
                exe_command.append("energy=T")
            if predict_forces:
                exe_command.append("forces=T")
            if predict_stress:
                exe_command.append("virial=T")

            p = subprocess.Popen(exe_command, stdout=open(predict_file, 'w'))
            stdout = p.communicate()[0]
            rc = p.returncode

            _, df_predict = self.read_cfgs(predict_file, predict=True)

        return df_orig, df_predict
Ejemplo n.º 4
0
 def test_pool_from(self):
     test_pool = pool_from(self.test_structures, self.test_energies,
                           self.test_forces, self.test_stresses)
     for p1, p2 in zip(test_pool, self.test_pool):
         self.assertEqual(p1['outputs']['energy'], p2['outputs']['energy'])
         self.assertEqual(p1['outputs']['forces'], p2['outputs']['forces'])
         self.assertEqual(p1['outputs']['virial_stress'],
                          p2['outputs']['virial_stress'])
Ejemplo n.º 5
0
    def evaluate(self, test_structures, ref_energies, ref_forces, ref_stresses):
        """
        Evaluate energies, forces and stresses of structures with trained
        interatomic potentials.

        Args:
            test_structures ([Structure]): List of Pymatgen Structure Objects.
            ref_energies ([float]): List of DFT-calculated total energies of
                each structure in structures list.
            ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of
                each structure with m atoms in structures list. m can be varied
                with each single structure case.
            ref_stresses (list): List of DFT-calculated (6, ) viriral stresses
                of each structure in structures list.
        """
        if not which('nnp-predict'):
            raise RuntimeError("NNP Predictor has not been found.")

        original_file = 'input.data'
        predict_file = 'output.data'

        predict_pool = pool_from(test_structures, ref_energies,
                                 ref_forces, ref_stresses)
        with ScratchDir('.'):
            _, _ = self.write_param()
            original_file = self.write_cfgs(original_file, cfg_pool=predict_pool)
            _, df_orig = self.read_cfgs(original_file)

            input_filename = self.write_input()

            dfs = []
            for data in predict_pool:
                _ = self.write_cfgs(original_file, cfg_pool=[data])
                p = subprocess.Popen(['nnp-predict', input_filename], stdout=subprocess.PIPE)
                stdout = p.communicate()[0]

                rc = p.returncode
                if rc != 0:
                    error_msg = 'RuNNer exited with return code %d' % rc
                    msg = stdout.decode("utf-8").split('\n')[:-1]
                    try:
                        error_line = [i for i, m in enumerate(msg)
                                      if m.startswith('ERROR')][0]
                        error_msg += ', '.join([e for e in msg[error_line:]])
                    except:
                        error_msg += msg[-1]
                    raise RuntimeError(error_msg)

                _, df = self.read_cfgs(predict_file)
                dfs.append(df)
            df_predict = pd.concat(dfs, ignore_index=True)

        return df_orig, df_predict
Ejemplo n.º 6
0
    def train(self, train_structures, energies=None, forces=None, stresses=None,
                                    **kwargs):
        """
        Training data with moment tensor method.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            energies ([float]): List of total energies of each structure in
                structures list.
            forces ([np.array]): List of (m, 3) forces array of each structure
                with m atoms in structures list. m can be varied with each
                single structure case.
            stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
            kwargs: Parameters in write_input method.
        """
        if not which('nnp-train'):
            raise RuntimeError("NNP Trainer has not been found.")

        train_pool = pool_from(train_structures, energies, forces, stresses)
        atoms_filename = 'input.data'

        with ScratchDir('.'):
            atoms_filename = self.write_cfgs(filename=atoms_filename, cfg_pool=train_pool)
            output = 'training_output'

            input_filename = self.write_input(**kwargs)
            p_scaling = subprocess.Popen(['nnp-scaling', input_filename])
            stdout = p_scaling.communicate()[0]

            p_train = subprocess.Popen(['nnp-train', input_filename],
                                       stdout=open(output, 'w'))
            stdout = p_train.communicate()[0]

            rc = p_train.returncode
            if rc != 0:
                error_msg = 'RuNNer exited with return code %d' % rc
                msg = stdout.decode("utf-8").split('\n')[:-1]
                try:
                    error_line = [i for i, m in enumerate(msg)
                                  if m.startswith('ERROR')][0]
                    error_msg += ', '.join([e for e in msg[error_line:]])
                except:
                    error_msg += msg[-1]
                raise RuntimeError(error_msg)

            with zopen(output) as f:
                error_lines = f.read()

            energy_rmse_pattern = re.compile('ENERGY\s*\S*\s*(\S*)\s*(\S*).*?\n')
            forces_rmse_pattern = re.compile('FORCES\s*\S*\s*(\S*)\s*(\S*).*?\n')
            self.train_energy_rmse, self.validation_energy_rmse = \
                    np.array([line for line in energy_rmse_pattern.findall(error_lines)],
                             dtype=np.float).T
            self.train_forces_rmse, self.validation_forces_rmse = \
                    np.array([line for line in forces_rmse_pattern.findall(error_lines)],
                             dtype=np.float).T

            weights_filename_pattern = 'weights*{}.out'.format(self.param.get('epochs'))
            weights_filename = glob.glob(weights_filename_pattern)[0]

            self.suffix = weights_filename.split('.')[1]

            self.load_weights(weights_filename)
            self.load_scaler('scaling.data')

        return rc
Ejemplo n.º 7
0
    def evaluate(self,
                 test_structures,
                 ref_energies=None,
                 ref_forces=None,
                 ref_stresses=None,
                 **kwargs):
        """
        Evaluate energies, forces and stresses of structures with trained
        interatomic potentials.

        Args:
            test_structures ([Structure]): List of Pymatgen Structure Objects.
            ref_energies ([float]): List of DFT-calculated total energies of
                each structure in structures list.
            ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of
                each structure with m atoms in structures list. m can be varied
                with each single structure case.
            ref_stresses (list): List of DFT-calculated (6, ) viriral stresses
                of each structure in structures list.
            kwargs: Parameters of write_param method.
        """
        if not which('mlp'):
            raise RuntimeError(
                "mlp has not been found.\n",
                "Please refer to http://gitlab.skoltech.ru/shapeev/mlip ",
                "for further detail.")
        fitted_mtp = 'fitted.mtp'
        original_file = 'original.cfgs'
        predict_file = 'predict.cfgs'
        predict_pool = pool_from(test_structures, ref_energies, ref_forces,
                                 ref_stresses)

        dataset = predict_pool[0]
        if isinstance(dataset['structure'], dict):
            structure = Structure.from_dict(dataset['structure'])
        else:
            structure = dataset['structure']
        symbol = structure.symbol_set[0]
        with ScratchDir('.'):
            self.write_param(fitted_mtp=fitted_mtp,
                             Abinitio=0,
                             Driver=1,
                             Write_cfgs=predict_file,
                             Database_filename=original_file,
                             **kwargs)
            original_file = self.write_cfg(original_file,
                                           cfg_pool=predict_pool)
            _, df_orig = self.read_cfgs(original_file, symbol=symbol)

            p = subprocess.Popen([
                'mlp', 'run', 'mlip.ini', '--filename={}'.format(original_file)
            ],
                                 stdout=subprocess.PIPE)
            stdout = p.communicate()[0]
            rc = p.returncode
            if rc != 0:
                error_msg = 'MLP exited with return code %d' % rc
                msg = stdout.decode("utf-8").split('\n')[:-1]
                try:
                    error_line = [
                        i for i, m in enumerate(msg) if m.startswith('ERROR')
                    ][0]
                    error_msg += ', '.join([e for e in msg[error_line:]])
                except Exception:
                    error_msg += msg[-1]
                raise RuntimeError(error_msg)
            if not os.path.exists(predict_file):
                predict_file = '_'.join([predict_file, '0'])
            _, df_predict = self.read_cfgs(predict_file, symbol=symbol)
        return df_orig, df_predict
Ejemplo n.º 8
0
    def train(self,
              train_structures,
              energies=None,
              forces=None,
              stresses=None,
              unfitted_mtp=None,
              max_dist=5,
              radial_basis_size=8,
              max_iter=500,
              energy_weight=1,
              force_weight=1e-2,
              stress_weight=0):
        """
        Training data with moment tensor method.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            energies ([float]): List of total energies of each structure in
                structures list.
            forces ([np.array]): List of (m, 3) forces array of each structure
                with m atoms in structures list. m can be varied with each
                single structure case.
            stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
            unfitted_mtp (str): Define the initial mtp file. Default to
                the mtp file stored in .params directory.
            max_dist (float): The actual radial cutoff.
            radial_basis_size (int): Relevant to number of radial basis function.
            max_iter (int): The number of maximum iteration.
            energy_weight (float): The weight of energy.
            force_weight (float): The weight of forces.
            stress_weight (float): The weight of stresses.
        """
        if not which('mlp'):
            raise RuntimeError(
                "mlp has not been found.\n",
                "Please refer to http://gitlab.skoltech.ru/shapeev/mlip ",
                "for further detail.")
        train_pool = pool_from(train_structures, energies, forces, stresses)
        atoms_filename = 'train.cfgs'

        with ScratchDir('.'):
            atoms_filename = self.write_cfg(filename=atoms_filename,
                                            cfg_pool=train_pool)

            if not unfitted_mtp:
                raise RuntimeError("No specific potentials file provided.")
            MTP_file_path = os.path.join(module_dir, 'params', unfitted_mtp)
            shutil.copyfile(MTP_file_path,
                            os.path.join(os.getcwd(), unfitted_mtp))

            with open(unfitted_mtp) as f:
                template = f.read()

            s = template % (max_dist, radial_basis_size)
            with open(unfitted_mtp, 'w') as f:
                f.write(s)

            save_fitted_mtp = '.'.join([
                unfitted_mtp.split('.')[0] + '_fitted',
                unfitted_mtp.split('.')[1]
            ])

            p = subprocess.Popen([
                'mlp', 'train', unfitted_mtp, atoms_filename,
                '--max-iter={}'.format(max_iter),
                '--trained-pot-name={}'.format(save_fitted_mtp),
                '--curr-pot-name={}'.format(unfitted_mtp),
                '--energy-weight={}'.format(energy_weight),
                '--force-weight={}'.format(force_weight),
                '--stress-weight={}'.format(stress_weight),
                '--init-params=same', '--auto-min-dist'
            ],
                                 stdout=subprocess.PIPE)
            stdout = p.communicate()[0]
            rc = p.returncode
            if rc != 0:
                error_msg = 'MLP exited with return code %d' % rc
                msg = stdout.decode("utf-8").split('\n')[:-1]
                try:
                    error_line = [
                        i for i, m in enumerate(msg) if m.startswith('ERROR')
                    ][0]
                    error_msg += ', '.join([e for e in msg[error_line:]])
                except Exception:
                    error_msg += msg[-1]
                raise RuntimeError(error_msg)

            def load_config(filename):
                param = OrderedDict()
                with open(filename, 'r') as f:
                    lines = f.readlines()
                param['safe'] = [line.rstrip() for line in lines[:-2]]
                for line in lines[-2:]:
                    key = line.rstrip().split(' = ')[0]
                    value = json.loads(line.rstrip().split(' = ')[1].replace(
                        '{', '[').replace('}', ']'))
                    param[key] = value
                return param

            self.param = load_config(save_fitted_mtp)
        return rc
Ejemplo n.º 9
0
    def train(self,
              train_structures,
              energies=None,
              forces=None,
              stresses=None,
              default_sigma=[0.0005, 0.1, 0.05, 0.01],
              use_energies=True,
              use_forces=True,
              use_stress=False,
              **kwargs):
        """
        Training data with gaussian process regression.

        Args:
            train_structures ([Structure]): The list of Pymatgen Structure object.
                energies ([float]): The list of total energies of each structure
                in structures list.
            energies ([float]): List of total energies of each structure in
                structures list.
            forces ([np.array]): List of (m, 3) forces array of each structure
                with m atoms in structures list. m can be varied with each
                single structure case.
            stresses (list): List of (6, ) virial stresses of each
                structure in structures list.
            default_sigma (list): Error criteria in energies, forces, stress
                and hessian. Should have 4 numbers.
            use_energies (bool): Whether to use dft total energies for training.
                Default to True.
            use_forces (bool): Whether to use dft atomic forces for training.
                Default to True.
            use_stress (bool): Whether to use dft virial stress for training.
                Default to False.

            kwargs:
                l_max (int): Parameter to configure GAP. The band limit of
                    spherical harmonics basis function. Default to 12.
                n_max (int): Parameter to configure GAP. The number of radial basis
                    function. Default to 10.
                atom_sigma (float): Parameter to configure GAP. The width of gaussian
                    atomic density. Default to 0.5.
                zeta (float): Present when covariance function type is do product.
                    Default to 4.
                cutoff (float): Parameter to configure GAP. The cutoff radius.
                    Default to 4.0.
                cutoff_transition_width (float): Parameter to configure GAP.
                    The transition width of cutoff radial. Default to 0.5.
                delta (float): Parameter to configure Sparsification.
                    The signal variance of noise. Default to 1.
                f0 (float): Parameter to configure Sparsification.
                    The signal mean of noise. Default to 0.0.
                n_sparse (int): Parameter to configure Sparsification.
                    Number of sparse points.
                covariance_type (str): Parameter to configure Sparsification.
                    The type of convariance function. Default to dot_product.
                sparse_method (str): Method to perform clustering in sparsification.
                    Default to 'cur_points'.

                sparse_jitter (float): Intrisic error of atomic/bond energy,
                    used to regularise the sparse covariance matrix.
                    Default to 1e-8.
                e0 (float): Atomic energy value to be subtracted from energies
                    before fitting. Default to 0.0.
                e0_offset (float): Offset of baseline. If zero, the offset is
                    the average atomic energy of the input data or the e0
                    specified manually. Default to 0.0.
        """
        if not which('gap_fit'):
            raise RuntimeError(
                "gap_fit has not been found.\n",
                "Please refer to https://github.com/libAtoms/QUIP for ",
                "further detail.")
        atoms_filename = 'train.xyz'
        xml_filename = 'train.xml'
        train_pool = pool_from(train_structures, energies, forces, stresses)

        exe_command = ["gap_fit"]
        exe_command.append('at_file={}'.format(atoms_filename))
        gap_configure_params = [
            'l_max', 'n_max', 'atom_sigma', 'zeta', 'cutoff',
            'cutoff_transition_width', 'delta', 'f0', 'n_sparse',
            'covariance_type', 'sparse_method'
        ]
        preprocess_params = ['sparse_jitter', 'e0', 'e0_offset']
        if len(default_sigma) != 4:
            raise ValueError(
                "The default sigma is supposed to have 4 numbers.")

        gap_command = ['soap']
        for param_name in gap_configure_params:
            param = kwargs.get(param_name) if kwargs.get(param_name) \
                else soap_params.get(param_name)
            gap_command.append(param_name + '=' + '{}'.format(param))
        exe_command.append("gap=" + "{" + "{}".format(' '.join(gap_command)) +
                           "}")

        for param_name in preprocess_params:
            param = kwargs.get(param_name) if kwargs.get(param_name) \
                else soap_params.get(param_name)
            exe_command.append(param_name + '=' + '{}'.format(param))

        default_sigma = [str(f) for f in default_sigma]
        exe_command.append("default_sigma={%s}" % (' '.join(default_sigma)))

        if use_energies:
            exe_command.append('energy_parameter_name=dft_energy')
        if use_forces:
            exe_command.append('force_parameter_name=dft_force')
        if use_stress:
            exe_command.append('virial_parameter_name=dft_virial')
        exe_command.append('gp_file={}'.format(xml_filename))

        with ScratchDir('.'):
            self.write_cfgs(filename=atoms_filename, cfg_pool=train_pool)

            p = subprocess.Popen(exe_command, stdout=subprocess.PIPE)
            stdout = p.communicate()[0]
            rc = p.returncode
            if rc != 0:
                error_msg = 'QUIP exited with return code %d' % rc
                msg = stdout.decode("utf-8").split('\n')[:-1]
                try:
                    error_line = [
                        i for i, m in enumerate(msg) if m.startswith('ERROR')
                    ][0]
                    error_msg += ', '.join([e for e in msg[error_line:]])
                except Exception:
                    error_msg += msg[-1]
                raise RuntimeError(error_msg)

            def get_xml(xml_file):
                tree = ET.parse(xml_file)
                root = tree.getroot()
                potential_label = root.tag
                gpcoordinates = list(root.iter('gpCoordinates'))[0]
                param_file = gpcoordinates.get('sparseX_filename')
                param = np.loadtxt(param_file)
                return tree, param, potential_label

            tree, param, potential_label = get_xml(xml_filename)
            self.param['xml'] = tree
            self.param['param'] = param
            self.param['potential_label'] = potential_label

        return rc