def parse_multi_xyz(filename): """ Extract multiple molecules from an XYZ file Note: This file will fail if not given a valid XYZ file TODO: Do some more elegant parsing to ensure that the xyz file is valid Args: filename (str): The multi-XYZ file to be parsed. Returns: molecules (list of Molecule objects) """ molecules = list() with open(filename) as molfile: text = molfile.readlines() linenum = 0 while linenum < len(text): try: num_atoms = int(text[linenum].strip()) mol = Molecule.from_str( "".join(text[linenum:linenum + num_atoms + 2]), "xyz") molecules.append(mol) linenum += num_atoms + 2 except ValueError: break return molecules
def test_main(self): o = Molecule.from_str(rhb18xyz, "xyz") o.set_charge_and_spin(-1, 3) task = AdfTask("optimize", **rhb18) inp = AdfInput(task) inp.write_file(o, self.tempfile) s = readfile(join(test_dir, "adf", "RhB18_adf.inp")) self.assertEqual(readfile(self.tempfile), s)
def test_to_from_file_string(self): for fmt in ["xyz", "json", "g03"]: s = self.mol.to(fmt=fmt) self.assertIsNotNone(s) m = Molecule.from_str(s, fmt=fmt) self.assertEqual(m, self.mol) self.assertIsInstance(m, Molecule) self.mol.to(filename="CH4_testing.xyz") self.assertTrue(os.path.exists("CH4_testing.xyz")) os.remove("CH4_testing.xyz")
def mol_from_string(string, fmt): """ Reads a string into a pymatgen Molecule. Uses the pymatgen IMolecule method from_str. Args: string: a string containing the molecular data fmt: the conversion format to use Returns: a pymatgen Molecule object """ try: return Molecule.from_str(string, fmt) except: printx("Error: could not convert string '"+str(fmt)+"' to Molecule.\n" +"Default supported formats are xyz, gaussian and pymatgen JSON molecules.\n" +"Installing openbabel allows for more extensions.", priority=1) return
def test_get_smiles(self): single_molecule = get_smiles(join(files_dir, "molecules"), ["1453094"]) self.assertEqual(single_molecule[0], 'c12c(cc(c(=O)n1C)C=O)cccc2') all_mols = [ "1453094", "1738108", "1873402", "2045554", "21925165", "22125071", "28599994", "31695576", "5078635", "6657763" ] for mol in all_mols: smiles = get_smiles(join(files_dir, "molecules"), [mol])[0] file = join(files_dir, "molecules", mol, "{}.mol".format(mol)) mol_smiles = Molecule.from_str(smiles, "smi") mol_file = Molecule.from_file(file) smiles_species = sorted( [str(e) for e in mol_smiles.species if str(e) != "H"]) file_species = sorted( [str(e) for e in mol_file.species if str(e) != "H"]) self.assertSequenceEqual(smiles_species, file_species)
def test_atom_block_key(self): block = AdfKey("atoms") o = Molecule.from_str(h2oxyz, "xyz") for site in o: block.add_subkey(AdfKey(str(site.specie), list(site.coords))) self.assertEqual(str(block), atoms_string)
def from_string(cls, string, fmt="coord"): """ Creates an instance from a string. Could be the string of a coord file or any format supported by pymatgen Molecule. Args: string (str): the string with the data. fmt (str): the format of the data. could be "coord" for Turbomole coord file or any format supported in pymatgen Molecule. Returns: An instance of MoleculeSystem. """ if fmt == "coord": dg = DataGroups(string=string) coordinates_str = dg.sdg("$coord", strict=True) if not coordinates_str: raise ValueError("The string does not contain $coord!") mol, fi = get_mol_and_indices_frozen(coordinates_str) int_def_str = dg.sdg("$intdef", strict=True) int_def = [] if int_def_str: lines = [] # remove empty lines and comments for l in int_def_str.splitlines(): lstrip = l.strip() if lstrip and not lstrip.startswith("#"): lines.append(l) int_def_str = "\n".join(lines) # split based on the presence of the index plus the status. # In a case like this: # 1 k 1.0000000000000 stre 4 1 val= 1.80084 # 2 k 1.0000000000000 bend 4 3 1 val= 106.27756 # 1.0000000000000 bend 3 2 1 # 1.0000000000000 bend 2 4 1 # 3 f 1.0000000000000 tors 1 2 3 4 # will split in 3 groups based on the presence of the digit plus k, f, d or i # at the beginning of the line. r = r"^\s*\d+\s+[kfdi]\s+.*?(?=\s*\d+\s+[kfdi]\s+|\Z)" for group in re.findall(r, int_def_str, re.DOTALL | re.MULTILINE): int_def.append(InternalDefinition.from_string(group)) user_def_bonds_str = dg.sdg("$user-defined bonds", strict=True) user_def_bonds = set() if user_def_bonds_str: # parses a line of this form: # 1-2, 3-4, 5|6 # splitting first on "," and then on "-" and "|" for l in user_def_bonds_str.splitlines(): l = l.strip() if not l or l.startswith("#"): continue for bond in l.split(","): for separator in ("-", "|"): if separator in bond: bond_indices = bond.split(separator) if len(bond_indices) != 2: raise ValueError( "Cannot parse user-defined bonds for line: {}" .format(l)) index_1 = int(bond_indices[0]) - 1 index_2 = int(bond_indices[1]) - 1 user_def_bonds.add( (index_1, separator, index_2)) break else: raise ValueError( "Cannot parse user-defined bonds for line: {}". format(l)) return cls(mol, int_def=int_def, frozen_indices=fi, user_defined_bonds=user_def_bonds) else: return cls(Molecule.from_str(string, fmt))