예제 #1
0
def parse_multi_xyz(filename):
    """
    Extract multiple molecules from an XYZ file

    Note: This file will fail if not given a valid XYZ file

    TODO: Do some more elegant parsing to ensure that the xyz file is valid

    Args:
        filename (str): The multi-XYZ file to be parsed.

    Returns:
        molecules (list of Molecule objects)
    """

    molecules = list()

    with open(filename) as molfile:
        text = molfile.readlines()

        linenum = 0

        while linenum < len(text):
            try:
                num_atoms = int(text[linenum].strip())
                mol = Molecule.from_str(
                    "".join(text[linenum:linenum + num_atoms + 2]), "xyz")
                molecules.append(mol)

                linenum += num_atoms + 2
            except ValueError:
                break

    return molecules
예제 #2
0
 def test_main(self):
     o = Molecule.from_str(rhb18xyz, "xyz")
     o.set_charge_and_spin(-1, 3)
     task = AdfTask("optimize", **rhb18)
     inp = AdfInput(task)
     inp.write_file(o, self.tempfile)
     s = readfile(join(test_dir, "adf", "RhB18_adf.inp"))
     self.assertEqual(readfile(self.tempfile), s)
예제 #3
0
파일: test_adf.py 프로젝트: ExpHP/pymatgen
 def test_main(self):
     o = Molecule.from_str(rhb18xyz, "xyz")
     o.set_charge_and_spin(-1, 3)
     task = AdfTask("optimize", **rhb18)
     inp = AdfInput(task)
     inp.write_file(o, self.tempfile)
     s = readfile(join(test_dir, "adf", "RhB18_adf.inp"))
     self.assertEqual(readfile(self.tempfile), s)
예제 #4
0
    def test_to_from_file_string(self):
        for fmt in ["xyz", "json", "g03"]:
            s = self.mol.to(fmt=fmt)
            self.assertIsNotNone(s)
            m = Molecule.from_str(s, fmt=fmt)
            self.assertEqual(m, self.mol)
            self.assertIsInstance(m, Molecule)

        self.mol.to(filename="CH4_testing.xyz")
        self.assertTrue(os.path.exists("CH4_testing.xyz"))
        os.remove("CH4_testing.xyz")
예제 #5
0
    def test_to_from_file_string(self):
        for fmt in ["xyz", "json", "g03"]:
            s = self.mol.to(fmt=fmt)
            self.assertIsNotNone(s)
            m = Molecule.from_str(s, fmt=fmt)
            self.assertEqual(m, self.mol)
            self.assertIsInstance(m, Molecule)

        self.mol.to(filename="CH4_testing.xyz")
        self.assertTrue(os.path.exists("CH4_testing.xyz"))
        os.remove("CH4_testing.xyz")
예제 #6
0
파일: molecule.py 프로젝트: yfyh2013/PyXtal
def mol_from_string(string, fmt):
    """
    Reads a string into a pymatgen Molecule. Uses the pymatgen IMolecule method from_str.
    
    Args:
        string: a string containing the molecular data
        fmt: the conversion format to use
    
    Returns:
        a pymatgen Molecule object
    """
    try:
        return Molecule.from_str(string, fmt)
    except:
        printx("Error: could not convert string '"+str(fmt)+"' to Molecule.\n"
            +"Default supported formats are xyz, gaussian and pymatgen JSON molecules.\n"
            +"Installing openbabel allows for more extensions.", priority=1)
        return
예제 #7
0
    def test_get_smiles(self):
        single_molecule = get_smiles(join(files_dir, "molecules"), ["1453094"])
        self.assertEqual(single_molecule[0], 'c12c(cc(c(=O)n1C)C=O)cccc2')

        all_mols = [
            "1453094", "1738108", "1873402", "2045554", "21925165", "22125071",
            "28599994", "31695576", "5078635", "6657763"
        ]

        for mol in all_mols:
            smiles = get_smiles(join(files_dir, "molecules"), [mol])[0]
            file = join(files_dir, "molecules", mol, "{}.mol".format(mol))

            mol_smiles = Molecule.from_str(smiles, "smi")
            mol_file = Molecule.from_file(file)

            smiles_species = sorted(
                [str(e) for e in mol_smiles.species if str(e) != "H"])
            file_species = sorted(
                [str(e) for e in mol_file.species if str(e) != "H"])

            self.assertSequenceEqual(smiles_species, file_species)
예제 #8
0
 def test_atom_block_key(self):
     block = AdfKey("atoms")
     o = Molecule.from_str(h2oxyz, "xyz")
     for site in o:
         block.add_subkey(AdfKey(str(site.specie), list(site.coords)))
     self.assertEqual(str(block), atoms_string)
예제 #9
0
파일: test_adf.py 프로젝트: ExpHP/pymatgen
 def test_atom_block_key(self):
     block = AdfKey("atoms")
     o = Molecule.from_str(h2oxyz, "xyz")
     for site in o:
         block.add_subkey(AdfKey(str(site.specie), list(site.coords)))
     self.assertEqual(str(block), atoms_string)
예제 #10
0
    def from_string(cls, string, fmt="coord"):
        """
        Creates an instance from a string. Could be the string of a coord file
        or any format supported by pymatgen Molecule.

        Args:
            string (str): the string with the data.
            fmt (str): the format of the data. could be "coord" for Turbomole
                coord file or any format supported in pymatgen Molecule.

        Returns:
            An instance of MoleculeSystem.
        """

        if fmt == "coord":
            dg = DataGroups(string=string)
            coordinates_str = dg.sdg("$coord", strict=True)
            if not coordinates_str:
                raise ValueError("The string does not contain $coord!")
            mol, fi = get_mol_and_indices_frozen(coordinates_str)

            int_def_str = dg.sdg("$intdef", strict=True)
            int_def = []
            if int_def_str:
                lines = []
                # remove empty lines and comments
                for l in int_def_str.splitlines():
                    lstrip = l.strip()
                    if lstrip and not lstrip.startswith("#"):
                        lines.append(l)
                int_def_str = "\n".join(lines)
                # split based on the presence of the index plus the status.
                # In a case like this:
                #    1 k  1.0000000000000 stre    4    1           val=   1.80084
                #    2 k  1.0000000000000 bend    4    3    1      val= 106.27756
                #         1.0000000000000 bend    3    2    1
                #         1.0000000000000 bend    2    4    1
                #    3 f  1.0000000000000 tors    1    2    3    4
                # will split in 3 groups based on the presence of the digit plus k, f, d or i
                # at the beginning of the line.
                r = r"^\s*\d+\s+[kfdi]\s+.*?(?=\s*\d+\s+[kfdi]\s+|\Z)"
                for group in re.findall(r, int_def_str,
                                        re.DOTALL | re.MULTILINE):
                    int_def.append(InternalDefinition.from_string(group))

            user_def_bonds_str = dg.sdg("$user-defined bonds", strict=True)
            user_def_bonds = set()
            if user_def_bonds_str:
                # parses a line of this form:
                # 1-2, 3-4, 5|6
                # splitting first on "," and then on "-" and "|"
                for l in user_def_bonds_str.splitlines():
                    l = l.strip()
                    if not l or l.startswith("#"):
                        continue
                    for bond in l.split(","):
                        for separator in ("-", "|"):
                            if separator in bond:
                                bond_indices = bond.split(separator)
                                if len(bond_indices) != 2:
                                    raise ValueError(
                                        "Cannot parse user-defined bonds for line: {}"
                                        .format(l))
                                index_1 = int(bond_indices[0]) - 1
                                index_2 = int(bond_indices[1]) - 1
                                user_def_bonds.add(
                                    (index_1, separator, index_2))
                                break
                        else:
                            raise ValueError(
                                "Cannot parse user-defined bonds for line: {}".
                                format(l))

            return cls(mol,
                       int_def=int_def,
                       frozen_indices=fi,
                       user_defined_bonds=user_def_bonds)

        else:
            return cls(Molecule.from_str(string, fmt))