def test_to_coord_string(self, molecule): ms = MoleculeSystem(molecule) test_value = """ 0.00000000000000 0.00000000000000 0.00000000000000 c 0.00000000000000 0.00000000000000 2.86118897312869 o 0.00000000000000 0.00000000000000 -2.86118897312869 o """ dg = DataGroups(ms.to_coord_string()) assert len(dg.dg_list) == 2 check_dg(dg.sdg("coord", strict=True), test_value) ms.frozen_indices = {0, 1} test_value = """ 0.00000000000000 0.00000000000000 0.00000000000000 c f 0.00000000000000 0.00000000000000 2.86118897312869 o f 0.00000000000000 0.00000000000000 -2.86118897312869 o """ dg = DataGroups(ms.to_coord_string()) assert len(dg.dg_list) == 2 check_dg(dg.sdg("coord", strict=True), test_value)
def test_dummy_atoms(self, molecule_filepath): ms = MoleculeSystem.from_file(molecule_filepath, fmt="coord") mol = ms.molecule print(mol[-1].specie) print(mol[-1].specie.__class__) # Pymatgen's Specie and DummySpecie have been changed to Species and # DummySpecies in v2020.10.9. We keep testing both for backward compatibility. assert isinstance(mol[-1].specie, (DummySpecies, DummySpecie)) assert mol[-1].specie.symbol == "Q" test_value2 = """ 0.00000000000000 0.00000000000000 -0.12178983933899 o 1.41713420892173 0.00000000000000 0.96657854674257 h -1.41713420892173 0.00000000000000 0.96657854674257 h 0.00000000000000 0.00000000000000 0.00000000000000 q """ dg = DataGroups(ms.to_coord_string()) assert len(dg.dg_list) == 2 check_dg(dg.sdg("coord", strict=True), test_value2) assert_MSONable(ms)
def test_from_string(self): # basic test string = """ $coord .00000000000000 .00000000000000 .00000000000000 n -1.15103063747470 -1.99364354517457 .00000000000000 o 2.30206127494940 .00000000000000 .00000000000000 o -1.15103063747470 1.99364354517457 .00000000000000 o $end """ ms = MoleculeSystem.from_string(string=string, fmt="coord") mol = ms.molecule assert mol[1].coords[0] == pytest.approx(-0.6090991821345737) assert len(mol) == 4 assert len(ms.frozen_indices) == 0 assert_MSONable(ms) # no coord with pytest.raises(ValueError, match=r'^The string does not contain \$coord!$'): MoleculeSystem.from_string(string="$end", fmt="coord") # with frozen and internal definitions string = """ $coord .00000000000000 .00000000000000 .00000000000000 n f -1.15103063747470 -1.99364354517457 .00000000000000 o 2.30206127494940 .00000000000000 .00000000000000 o -1.15103063747470 1.99364354517457 .00000000000000 o f $intdef # definitions of internal coordinates 1 k 1.0000000000000 stre 1 2 val= 2.43987 2 f 1.0000000000000 bend 1 2 3 -0.5000000000000 bend 2 3 4 $end """ ms = MoleculeSystem.from_string(string=string, fmt="coord") mol = ms.molecule assert mol[1].coords[0] == pytest.approx(-0.6090991821345737) assert len(mol) == 4 assert ms.frozen_indices == {0, 3} assert len(ms.int_def) == 2 assert ms.int_def[0].value == pytest.approx(2.43987) assert ms.int_def[1].value == None assert ms.int_def[0].status == "k" assert ms.int_def[1].status == "f" assert ms.int_def[0].indices[0] == [0, 1] assert len(ms.int_def[1].indices) == 2 assert ms.int_def[1].weights[1] == pytest.approx(-0.5) dg = DataGroups(ms.to_coord_string()) dg_ref = DataGroups(string) assert len(dg.dg_list) == 3 check_dg(dg.sdg("coord", strict=True), dg_ref.sdg("coord", strict=True)) check_dg(dg.sdg("intdef", strict=True), dg_ref.sdg("intdef", strict=True)) # with user-defined bonds string = """ $coord .00000000000000 .00000000000000 .00000000000000 n -1.15103063747470 -1.99364354517457 .00000000000000 o 2.30206127494940 .00000000000000 .00000000000000 o -1.15103063747470 1.99364354517457 .00000000000000 o $user-defined bonds 1-2, 2 - 3,3|4 $end """ ms = MoleculeSystem.from_string(string=string, fmt="coord") mol = ms.molecule assert mol[1].coords[0] == pytest.approx(-0.6090991821345737) assert len(mol) == 4 assert ms.user_defined_bonds == {(0, "-", 1), (1, "-", 2), (2, "|", 3)} dg = DataGroups(ms.to_coord_string()) dg_ref = DataGroups(string) assert len(dg.dg_list) == 3 check_dg(dg.sdg("coord", strict=True), dg_ref.sdg("coord", strict=True)) check_user_defined_bonds_dg( dg.sdg("user-defined bonds", strict=True), dg_ref.sdg("user-defined bonds", strict=True)) # malformed user-defined bonds string = """ $coord .00000000000000 .00000000000000 .00000000000000 n -1.15103063747470 -1.99364354517457 .00000000000000 o 2.30206127494940 .00000000000000 .00000000000000 o -1.15103063747470 1.99364354517457 .00000000000000 o $user-defined bonds 1-2, 2 3,3|4 $end """ with pytest.raises(ValueError, match="Cannot parse user-defined bonds.*"): MoleculeSystem.from_string(string=string, fmt="coord") # from xyz format ms = MoleculeSystem.from_string(mol.to(fmt="xyz"), fmt="xyz") assert ms.molecule[1].coords[0] == pytest.approx(-0.6090991821345737)
def from_string(cls, string, fmt="coord"): """ Creates an instance from a string. Could be the string of a coord file or any format supported by pymatgen Molecule. Args: string (str): the string with the data. fmt (str): the format of the data. could be "coord" for Turbomole coord file or any format supported in pymatgen Molecule. Returns: An instance of MoleculeSystem. """ if fmt == "coord": dg = DataGroups(string=string) coordinates_str = dg.sdg("$coord", strict=True) if not coordinates_str: raise ValueError("The string does not contain $coord!") mol, fi = get_mol_and_indices_frozen(coordinates_str) int_def_str = dg.sdg("$intdef", strict=True) int_def = [] if int_def_str: lines = [] # remove empty lines and comments for l in int_def_str.splitlines(): lstrip = l.strip() if lstrip and not lstrip.startswith("#"): lines.append(l) int_def_str = "\n".join(lines) # split based on the presence of the index plus the status. # In a case like this: # 1 k 1.0000000000000 stre 4 1 val= 1.80084 # 2 k 1.0000000000000 bend 4 3 1 val= 106.27756 # 1.0000000000000 bend 3 2 1 # 1.0000000000000 bend 2 4 1 # 3 f 1.0000000000000 tors 1 2 3 4 # will split in 3 groups based on the presence of the digit plus k, f, d or i # at the beginning of the line. r = r"^\s*\d+\s+[kfdi]\s+.*?(?=\s*\d+\s+[kfdi]\s+|\Z)" for group in re.findall(r, int_def_str, re.DOTALL | re.MULTILINE): int_def.append(InternalDefinition.from_string(group)) user_def_bonds_str = dg.sdg("$user-defined bonds", strict=True) user_def_bonds = set() if user_def_bonds_str: # parses a line of this form: # 1-2, 3-4, 5|6 # splitting first on "," and then on "-" and "|" for l in user_def_bonds_str.splitlines(): l = l.strip() if not l or l.startswith("#"): continue for bond in l.split(","): for separator in ("-", "|"): if separator in bond: bond_indices = bond.split(separator) if len(bond_indices) != 2: raise ValueError( "Cannot parse user-defined bonds for line: {}" .format(l)) index_1 = int(bond_indices[0]) - 1 index_2 = int(bond_indices[1]) - 1 user_def_bonds.add( (index_1, separator, index_2)) break else: raise ValueError( "Cannot parse user-defined bonds for line: {}". format(l)) return cls(mol, int_def=int_def, frozen_indices=fi, user_defined_bonds=user_def_bonds) else: return cls(Molecule.from_str(string, fmt))