def uniform_labels(self, mol1, mol2): obmol1 = BabelMolAdaptor(mol1).openbabel_mol obmol2 = BabelMolAdaptor(mol2).openbabel_mol ilabel1, iequal_atom1, inchi1 = self._inchi_labels(obmol1) ilabel2, iequal_atom2, inchi2 = self._inchi_labels(obmol2) if inchi1 != inchi2: return None, None # Topoligically different if iequal_atom1 != iequal_atom2: raise Exception("Design Error! Equavilent atoms are inconsistent") vmol1 = self._virtual_molecule(obmol1, ilabel1, iequal_atom1) vmol2 = self._virtual_molecule(obmol2, ilabel2, iequal_atom2) if vmol1.NumAtoms() < 3 or self._is_molecule_linear(vmol1) \ or self._is_molecule_linear(vmol2): # using isomorphism for difficult (actually simple) molecules clabel1, clabel2 = self._assistant_mapper.uniform_labels(mol1, mol2) else: heavy_atom_indices2 = self._align_heavy_atoms(obmol1, obmol2, vmol1, vmol2, ilabel1, ilabel2, iequal_atom1) clabel1, clabel2 = self._align_hydrogen_atoms(obmol1, obmol2, ilabel1, heavy_atom_indices2) elements1 = self._get_elements(obmol1, clabel1) elements2 = self._get_elements(obmol2, clabel2) if elements1 != elements2: raise Exception("Design Error! Atomic elements are inconsistent") return clabel1, clabel2
def test_init(self): adaptor = BabelMolAdaptor(self.mol) obmol = adaptor.openbabel_mol self.assertEqual(obmol.NumAtoms(), 5) adaptor = BabelMolAdaptor(adaptor.openbabel_mol) self.assertEqual(adaptor.pymatgen_mol.formula, "H4 C1")
def test_localopt(self): self.mol[1] = "H", [0, 0, 1.05] adaptor = BabelMolAdaptor(self.mol) adaptor.localopt() optmol = adaptor.pymatgen_mol for site in optmol[1:]: self.assertAlmostEqual(site.distance(optmol[0]), 1.09216, 2)
def test_get_rmsd(self): mm = MoleculeMatcher() mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol self.assertEqual('{0:7.3}'.format(mm.get_rmsd(mol1, mol2)), "0.00488")
def get_molecule_hash(self, mol): """ Return inchi as molecular hash """ obmol = BabelMolAdaptor(mol).openbabel_mol inchi = self._inchi_labels(obmol)[2] return inchi
def test_from_string(self): if not babelio.babel_loaded: raise SkipTest("OpenBabel not present. Skipping...") xyz = XYZ(self.mol) adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz") mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H4 C1")
def write_mol(mol, filename): """ Write a molecule to a file based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, Gaussian input (gjf|g03|g09|com|inp), and pymatgen's JSON serialized molecules. Args: mol (Molecule/IMolecule): Molecule to write filename (str): A filename to write to. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ(mol).write_file(filename) elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"] ]): return GaussianInput(mol).write_file(filename) elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename, "w") as f: return json.dump(mol, f, cls=PMGJSONEncoder) else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor(mol).write_file(filename, m.group(1)) raise ValueError("Unrecognized file extension!")
def test_from_file(self): if not babelio.babel_loaded: raise SkipTest("OpenBabel not present. Skipping...") adaptor = BabelMolAdaptor.from_file(os.path.join(test_dir, "Ethane_e.pdb"), "pdb") mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H6 C2")
def uniform_labels(self, mol1, mol2): """ Pair the geometrically equivalent atoms of the molecules. Calculate RMSD on all possible isomorphism mappings and return mapping with the least RMSD Args: mol1: First molecule. OpenBabel OBMol or pymatgen Molecule object. mol2: Second molecule. OpenBabel OBMol or pymatgen Molecule object. Returns: (list1, list2) if uniform atom order is found. list1 and list2 are for mol1 and mol2, respectively. Their length equal to the number of atoms. They represents the uniform atom order of the two molecules. The value of each element is the original atom index in mol1 or mol2 of the current atom in uniform atom order. (None, None) if unform atom is not available. """ obmol1 = BabelMolAdaptor(mol1).openbabel_mol obmol2 = BabelMolAdaptor(mol2).openbabel_mol h1 = self.get_molecule_hash(obmol1) h2 = self.get_molecule_hash(obmol2) if h1 != h2: return None, None query = ob.CompileMoleculeQuery(obmol1) isomapper = ob.OBIsomorphismMapper.GetInstance(query) isomorph = ob.vvpairUIntUInt() isomapper.MapAll(obmol2, isomorph) sorted_isomorph = [sorted(x, key=lambda morp: morp[0]) for x in isomorph] label2_list = tuple([tuple([p[1] + 1 for p in x]) for x in sorted_isomorph]) vmol1 = obmol1 aligner = ob.OBAlign(True, False) aligner.SetRefMol(vmol1) least_rmsd = float("Inf") best_label2 = None label1 = list(range(1, obmol1.NumAtoms() + 1)) # noinspection PyProtectedMember elements1 = InchiMolAtomMapper._get_elements(vmol1, label1) for label2 in label2_list: # noinspection PyProtectedMember elements2 = InchiMolAtomMapper._get_elements(obmol2, label2) if elements1 != elements2: continue vmol2 = ob.OBMol() for i in label2: vmol2.AddAtom(obmol2.GetAtom(i)) aligner.SetTargetMol(vmol2) aligner.Align() rmsd = aligner.GetRMSD() if rmsd < least_rmsd: least_rmsd = rmsd best_label2 = copy.copy(label2) return label1, best_label2
def test_group_molecules(self): mm = MoleculeMatcher(tolerance=0.001) filename_list = None with open(os.path.join(test_dir, "mol_list.txt")) as f: filename_list = [line.strip() for line in f.readlines()] mol_list = [BabelMolAdaptor.from_file(os.path.join(test_dir, f)).pymatgen_mol\ for f in filename_list] mol_groups = mm.group_molecules(mol_list) filename_groups = [[filename_list[mol_list.index(m)] for m in g] for g \ in mol_groups] grouped_text = None with open(os.path.join(test_dir, "grouped_mol_list.txt")) as f: grouped_text = f.read().strip() self.assertEqual(str(filename_groups), grouped_text)
def read_mol(filename): """ Reads a molecule based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and pymatgen's JSON serialized molecules. Using openbabel, many more extensions are supported but requires openbabel to be installed. Args: filename: A filename to read from. Returns: A Molecule object. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ.from_file(filename).molecule elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"] ]): return GaussianInput.from_file(filename).molecule elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"] ]): return GaussianOutput(filename).final_structure elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename) as f: s = json.load(f, cls=PMGJSONDecoder) if type(s) != Molecule: raise IOError("File does not contain a valid serialized " "molecule") return s else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol raise ValueError("Unrecognized file extension!")
def read_mol(filename): """ Reads a molecule based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and pymatgen's JSON serialized molecules. Using openbabel, many more extensions are supported but requires openbabel to be installed. Args: filename: A filename to read from. Returns: A Molecule object. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ.from_file(filename).molecule elif any([fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"]]): return GaussianInput.from_file(filename).molecule elif any([fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"]]): return GaussianOutput(filename).final_structure elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename) as f: s = json.load(f, cls=PMGJSONDecoder) if type(s) != Molecule: raise IOError("File does not contain a valid serialized " "molecule") return s else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol raise ValueError("Unrecognized file extension!")
def _calc_rms(self, mol1, mol2, clabel1, clabel2): """ Calculate the RMSD. Args: mol1: The first molecule. OpenBabel OBMol or pymatgen Molecule object mol2: The second molecule. OpenBabel OBMol or pymatgen Molecule object clabel1: The atom indices that can reorder the first molecule to uniform atom order clabel1: The atom indices that can reorder the second molecule to uniform atom order Returns: The RMSD. """ obmol1 = BabelMolAdaptor(mol1).openbabel_mol obmol2 = BabelMolAdaptor(mol2).openbabel_mol cmol1 = ob.OBMol() for i in clabel1: oa1 = obmol1.GetAtom(i) a1 = cmol1.NewAtom() a1.SetAtomicNum(oa1.GetAtomicNum()) a1.SetVector(oa1.GetVector()) cmol2 = ob.OBMol() for i in clabel2: oa2 = obmol2.GetAtom(i) a2 = cmol2.NewAtom() a2.SetAtomicNum(oa2.GetAtomicNum()) a2.SetVector(oa2.GetVector()) aligner = ob.OBAlign(True, False) aligner.SetRefMol(cmol1) aligner.SetTargetMol(cmol2) aligner.Align() return aligner.GetRMSD()
def test_from_string(self): xyz = XYZ(self.mol) adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz") mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H4 C1")
def test_from_file(self): adaptor = BabelMolAdaptor.from_file( os.path.join(test_dir, "Ethane_e.pdb"), "pdb") mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H6 C2")
def test_fit_with_mapper(self, mapper): coords = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000]] mol1 = Molecule(["C", "H", "H", "H", "H"], coords) op = SymmOp.from_origin_axis_angle([0, 0, 0], [0.1, 0.2, 0.3], 60) rotcoords = [op.operate(c) for c in coords] mol2 = Molecule(["C", "H", "H", "H", "H"], rotcoords) mm = MoleculeMatcher(mapper=mapper) self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "benzene1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "benzene2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "benzene1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t2.xyz")).pymatgen_mol self.assertFalse(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "c1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "c2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "j1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "j2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "ethene1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "ethene2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "toluene1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "toluene2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "cyclohexane1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "cyclohexane2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "oxygen1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "oxygen2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mm = MoleculeMatcher(tolerance=0.001, mapper=mapper) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol self.assertFalse(mm.fit(mol1, mol2))
def fit_with_mapper(self, mapper): coords = [[0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000]] mol1 = Molecule(["C", "H", "H", "H", "H"], coords) op = SymmOp.from_origin_axis_angle([0, 0, 0], [0.1, 0.2, 0.3], 60) rotcoords = [op.operate(c) for c in coords] mol2 = Molecule(["C", "H", "H", "H", "H"], rotcoords) mm = MoleculeMatcher(mapper=mapper) self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join( test_dir, "benzene1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join( test_dir, "benzene2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join( test_dir, "benzene1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t2.xyz")).pymatgen_mol self.assertFalse(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "c1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "c2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "j1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "j2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join( test_dir, "ethene1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join( test_dir, "ethene2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join( test_dir, "toluene1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join( test_dir, "toluene2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file( os.path.join(test_dir, "cyclohexane1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file( os.path.join(test_dir, "cyclohexane2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mol1 = BabelMolAdaptor.from_file(os.path.join( test_dir, "oxygen1.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join( test_dir, "oxygen2.xyz")).pymatgen_mol self.assertTrue(mm.fit(mol1, mol2)) mm = MoleculeMatcher(tolerance=0.001, mapper=mapper) mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol self.assertFalse(mm.fit(mol1, mol2))