Example #1
0
    def test_to_coord_string(self, molecule):
        ms = MoleculeSystem(molecule)
        test_value = """
0.00000000000000 0.00000000000000 0.00000000000000 c
0.00000000000000 0.00000000000000 2.86118897312869 o
0.00000000000000 0.00000000000000 -2.86118897312869 o
"""
        dg = DataGroups(ms.to_coord_string())
        assert len(dg.dg_list) == 2
        check_dg(dg.sdg("coord", strict=True), test_value)

        ms.frozen_indices = {0, 1}
        test_value = """
0.00000000000000 0.00000000000000 0.00000000000000 c f
0.00000000000000 0.00000000000000 2.86118897312869 o f
0.00000000000000 0.00000000000000 -2.86118897312869 o
"""
        dg = DataGroups(ms.to_coord_string())
        assert len(dg.dg_list) == 2
        check_dg(dg.sdg("coord", strict=True), test_value)
Example #2
0
    def test_dummy_atoms(self, molecule_filepath):
        ms = MoleculeSystem.from_file(molecule_filepath, fmt="coord")
        mol = ms.molecule
        print(mol[-1].specie)
        print(mol[-1].specie.__class__)
        # Pymatgen's Specie and DummySpecie have been changed to Species and
        # DummySpecies in v2020.10.9. We keep testing both for backward compatibility.
        assert isinstance(mol[-1].specie, (DummySpecies, DummySpecie))
        assert mol[-1].specie.symbol == "Q"

        test_value2 = """
0.00000000000000 0.00000000000000 -0.12178983933899 o
1.41713420892173 0.00000000000000 0.96657854674257 h
-1.41713420892173 0.00000000000000 0.96657854674257 h
0.00000000000000 0.00000000000000 0.00000000000000 q
"""
        dg = DataGroups(ms.to_coord_string())
        assert len(dg.dg_list) == 2
        check_dg(dg.sdg("coord", strict=True), test_value2)

        assert_MSONable(ms)
Example #3
0
    def test_from_string(self):
        # basic test
        string = """
$coord
 .00000000000000       .00000000000000       .00000000000000      n       
-1.15103063747470     -1.99364354517457       .00000000000000      o       
2.30206127494940       .00000000000000       .00000000000000      o       
-1.15103063747470      1.99364354517457       .00000000000000      o       
$end        
"""

        ms = MoleculeSystem.from_string(string=string, fmt="coord")

        mol = ms.molecule
        assert mol[1].coords[0] == pytest.approx(-0.6090991821345737)
        assert len(mol) == 4

        assert len(ms.frozen_indices) == 0

        assert_MSONable(ms)

        # no coord
        with pytest.raises(ValueError,
                           match=r'^The string does not contain \$coord!$'):
            MoleculeSystem.from_string(string="$end", fmt="coord")

        # with frozen and internal definitions
        string = """
$coord
 .00000000000000       .00000000000000       .00000000000000      n  f
-1.15103063747470     -1.99364354517457       .00000000000000      o       
2.30206127494940       .00000000000000       .00000000000000      o       
-1.15103063747470      1.99364354517457       .00000000000000      o f       
$intdef
# definitions of internal coordinates

1 k  1.0000000000000 stre    1    2           val=   2.43987
2 f  1.0000000000000 bend    1    2  3        
     -0.5000000000000 bend    2    3  4        
$end        
"""
        ms = MoleculeSystem.from_string(string=string, fmt="coord")
        mol = ms.molecule
        assert mol[1].coords[0] == pytest.approx(-0.6090991821345737)
        assert len(mol) == 4

        assert ms.frozen_indices == {0, 3}

        assert len(ms.int_def) == 2
        assert ms.int_def[0].value == pytest.approx(2.43987)
        assert ms.int_def[1].value == None
        assert ms.int_def[0].status == "k"
        assert ms.int_def[1].status == "f"
        assert ms.int_def[0].indices[0] == [0, 1]
        assert len(ms.int_def[1].indices) == 2
        assert ms.int_def[1].weights[1] == pytest.approx(-0.5)

        dg = DataGroups(ms.to_coord_string())
        dg_ref = DataGroups(string)
        assert len(dg.dg_list) == 3
        check_dg(dg.sdg("coord", strict=True), dg_ref.sdg("coord",
                                                          strict=True))
        check_dg(dg.sdg("intdef", strict=True),
                 dg_ref.sdg("intdef", strict=True))

        # with user-defined bonds
        string = """
$coord
 .00000000000000       .00000000000000       .00000000000000      n 
-1.15103063747470     -1.99364354517457       .00000000000000      o       
2.30206127494940       .00000000000000       .00000000000000      o       
-1.15103063747470      1.99364354517457       .00000000000000      o       
$user-defined bonds
1-2, 2 - 3,3|4      
$end        
"""
        ms = MoleculeSystem.from_string(string=string, fmt="coord")
        mol = ms.molecule
        assert mol[1].coords[0] == pytest.approx(-0.6090991821345737)
        assert len(mol) == 4

        assert ms.user_defined_bonds == {(0, "-", 1), (1, "-", 2), (2, "|", 3)}
        dg = DataGroups(ms.to_coord_string())
        dg_ref = DataGroups(string)
        assert len(dg.dg_list) == 3
        check_dg(dg.sdg("coord", strict=True), dg_ref.sdg("coord",
                                                          strict=True))
        check_user_defined_bonds_dg(
            dg.sdg("user-defined bonds", strict=True),
            dg_ref.sdg("user-defined bonds", strict=True))

        # malformed user-defined bonds
        string = """
$coord
 .00000000000000       .00000000000000       .00000000000000      n 
-1.15103063747470     -1.99364354517457       .00000000000000      o       
2.30206127494940       .00000000000000       .00000000000000      o       
-1.15103063747470      1.99364354517457       .00000000000000      o       
$user-defined bonds
1-2, 2 3,3|4      
$end        
"""
        with pytest.raises(ValueError,
                           match="Cannot parse user-defined bonds.*"):
            MoleculeSystem.from_string(string=string, fmt="coord")

        # from xyz format
        ms = MoleculeSystem.from_string(mol.to(fmt="xyz"), fmt="xyz")
        assert ms.molecule[1].coords[0] == pytest.approx(-0.6090991821345737)
Example #4
0
    def from_string(cls, string, fmt="coord"):
        """
        Creates an instance from a string. Could be the string of a coord file
        or any format supported by pymatgen Molecule.

        Args:
            string (str): the string with the data.
            fmt (str): the format of the data. could be "coord" for Turbomole
                coord file or any format supported in pymatgen Molecule.

        Returns:
            An instance of MoleculeSystem.
        """

        if fmt == "coord":
            dg = DataGroups(string=string)
            coordinates_str = dg.sdg("$coord", strict=True)
            if not coordinates_str:
                raise ValueError("The string does not contain $coord!")
            mol, fi = get_mol_and_indices_frozen(coordinates_str)

            int_def_str = dg.sdg("$intdef", strict=True)
            int_def = []
            if int_def_str:
                lines = []
                # remove empty lines and comments
                for l in int_def_str.splitlines():
                    lstrip = l.strip()
                    if lstrip and not lstrip.startswith("#"):
                        lines.append(l)
                int_def_str = "\n".join(lines)
                # split based on the presence of the index plus the status.
                # In a case like this:
                #    1 k  1.0000000000000 stre    4    1           val=   1.80084
                #    2 k  1.0000000000000 bend    4    3    1      val= 106.27756
                #         1.0000000000000 bend    3    2    1
                #         1.0000000000000 bend    2    4    1
                #    3 f  1.0000000000000 tors    1    2    3    4
                # will split in 3 groups based on the presence of the digit plus k, f, d or i
                # at the beginning of the line.
                r = r"^\s*\d+\s+[kfdi]\s+.*?(?=\s*\d+\s+[kfdi]\s+|\Z)"
                for group in re.findall(r, int_def_str,
                                        re.DOTALL | re.MULTILINE):
                    int_def.append(InternalDefinition.from_string(group))

            user_def_bonds_str = dg.sdg("$user-defined bonds", strict=True)
            user_def_bonds = set()
            if user_def_bonds_str:
                # parses a line of this form:
                # 1-2, 3-4, 5|6
                # splitting first on "," and then on "-" and "|"
                for l in user_def_bonds_str.splitlines():
                    l = l.strip()
                    if not l or l.startswith("#"):
                        continue
                    for bond in l.split(","):
                        for separator in ("-", "|"):
                            if separator in bond:
                                bond_indices = bond.split(separator)
                                if len(bond_indices) != 2:
                                    raise ValueError(
                                        "Cannot parse user-defined bonds for line: {}"
                                        .format(l))
                                index_1 = int(bond_indices[0]) - 1
                                index_2 = int(bond_indices[1]) - 1
                                user_def_bonds.add(
                                    (index_1, separator, index_2))
                                break
                        else:
                            raise ValueError(
                                "Cannot parse user-defined bonds for line: {}".
                                format(l))

            return cls(mol,
                       int_def=int_def,
                       frozen_indices=fi,
                       user_defined_bonds=user_def_bonds)

        else:
            return cls(Molecule.from_str(string, fmt))