Example #1
0
    def uniform_labels(self, mol1, mol2):
        obmol1 = BabelMolAdaptor(mol1).openbabel_mol
        obmol2 = BabelMolAdaptor(mol2).openbabel_mol

        ilabel1, iequal_atom1, inchi1 = self._inchi_labels(obmol1)
        ilabel2, iequal_atom2, inchi2 = self._inchi_labels(obmol2)

        if inchi1 != inchi2:
            return None, None  # Topoligically different

        if iequal_atom1 != iequal_atom2:
            raise Exception("Design Error! Equavilent atoms are inconsistent")

        vmol1 = self._virtual_molecule(obmol1, ilabel1, iequal_atom1)
        vmol2 = self._virtual_molecule(obmol2, ilabel2, iequal_atom2)

        if vmol1.NumAtoms() < 3 or self._is_molecule_linear(vmol1) \
                or self._is_molecule_linear(vmol2):
            # using isomorphism for difficult (actually simple) molecules
            clabel1, clabel2 = self._assistant_mapper.uniform_labels(mol1, mol2)
        else:
            heavy_atom_indices2 = self._align_heavy_atoms(obmol1, obmol2,
                                                          vmol1, vmol2, ilabel1,
                                                          ilabel2, iequal_atom1)
            clabel1, clabel2 = self._align_hydrogen_atoms(obmol1, obmol2,
                                                          ilabel1,
                                                          heavy_atom_indices2)

        elements1 = self._get_elements(obmol1, clabel1)
        elements2 = self._get_elements(obmol2, clabel2)

        if elements1 != elements2:
            raise Exception("Design Error! Atomic elements are inconsistent")

        return clabel1, clabel2
Example #2
0
    def test_init(self):
        adaptor = BabelMolAdaptor(self.mol)
        obmol = adaptor.openbabel_mol
        self.assertEqual(obmol.NumAtoms(), 5)

        adaptor = BabelMolAdaptor(adaptor.openbabel_mol)
        self.assertEqual(adaptor.pymatgen_mol.formula, "H4 C1")
Example #3
0
 def test_localopt(self):
     self.mol[1] = "H", [0, 0, 1.05]
     adaptor = BabelMolAdaptor(self.mol)
     adaptor.localopt()
     optmol = adaptor.pymatgen_mol
     for site in optmol[1:]:
         self.assertAlmostEqual(site.distance(optmol[0]), 1.09216, 2)
 def test_get_rmsd(self):
     mm = MoleculeMatcher()
     mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                   "t3.xyz")).pymatgen_mol
     mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                   "t4.xyz")).pymatgen_mol
     self.assertEqual('{0:7.3}'.format(mm.get_rmsd(mol1, mol2)), "0.00488")
Example #5
0
 def test_localopt(self):
     self.mol[1] = "H", [0, 0, 1.05]
     adaptor = BabelMolAdaptor(self.mol)
     adaptor.localopt()
     optmol = adaptor.pymatgen_mol
     for site in optmol[1:]:
         self.assertAlmostEqual(site.distance(optmol[0]), 1.09216, 2)
Example #6
0
 def get_molecule_hash(self, mol):
     """
     Return inchi as molecular hash
     """
     obmol = BabelMolAdaptor(mol).openbabel_mol
     inchi = self._inchi_labels(obmol)[2]
     return inchi
Example #7
0
 def test_from_string(self):
     if not babelio.babel_loaded:
         raise SkipTest("OpenBabel not present. Skipping...")
     xyz = XYZ(self.mol)
     adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H4 C1")
Example #8
0
def write_mol(mol, filename):
    """
    Write a molecule to a file based on file extension. For example, anything
    ending in a "xyz" is assumed to be a XYZ file. Supported formats include
    xyz, Gaussian input (gjf|g03|g09|com|inp), and pymatgen's JSON serialized
    molecules.

    Args:
        mol (Molecule/IMolecule): Molecule to write
        filename (str): A filename to write to.
    """
    fname = os.path.basename(filename)
    if fnmatch(fname.lower(), "*.xyz*"):
        return XYZ(mol).write_file(filename)
    elif any([
            fnmatch(fname.lower(), "*.{}*".format(r))
            for r in ["gjf", "g03", "g09", "com", "inp"]
    ]):
        return GaussianInput(mol).write_file(filename)
    elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"):
        with zopen(filename, "w") as f:
            return json.dump(mol, f, cls=PMGJSONEncoder)
    else:
        m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)",
                      filename.lower())
        if m:
            return BabelMolAdaptor(mol).write_file(filename, m.group(1))

    raise ValueError("Unrecognized file extension!")
Example #9
0
 def test_from_file(self):
     if not babelio.babel_loaded:
         raise SkipTest("OpenBabel not present. Skipping...")
     adaptor = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "Ethane_e.pdb"),
                                         "pdb")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H6 C2")
Example #10
0
    def uniform_labels(self, mol1, mol2):
        """
        Pair the geometrically equivalent atoms of the molecules.
        Calculate RMSD on all possible isomorphism mappings and return mapping
        with the least RMSD

        Args:
            mol1: First molecule. OpenBabel OBMol or pymatgen Molecule object.
            mol2: Second molecule. OpenBabel OBMol or pymatgen Molecule object.

        Returns:
            (list1, list2) if uniform atom order is found. list1 and list2
            are for mol1 and mol2, respectively. Their length equal
            to the number of atoms. They represents the uniform atom order
            of the two molecules. The value of each element is the original
            atom index in mol1 or mol2 of the current atom in uniform atom
            order.
            (None, None) if unform atom is not available.
        """
        obmol1 = BabelMolAdaptor(mol1).openbabel_mol
        obmol2 = BabelMolAdaptor(mol2).openbabel_mol

        h1 = self.get_molecule_hash(obmol1)
        h2 = self.get_molecule_hash(obmol2)
        if h1 != h2:
            return None, None

        query = ob.CompileMoleculeQuery(obmol1)
        isomapper = ob.OBIsomorphismMapper.GetInstance(query)
        isomorph = ob.vvpairUIntUInt()
        isomapper.MapAll(obmol2, isomorph)

        sorted_isomorph = [sorted(x, key=lambda morp: morp[0])
                           for x in isomorph]
        label2_list = tuple([tuple([p[1] + 1 for p in x])
                             for x in sorted_isomorph])

        vmol1 = obmol1
        aligner = ob.OBAlign(True, False)
        aligner.SetRefMol(vmol1)
        least_rmsd = float("Inf")
        best_label2 = None
        label1 = list(range(1, obmol1.NumAtoms() + 1))
        # noinspection PyProtectedMember
        elements1 = InchiMolAtomMapper._get_elements(vmol1, label1)
        for label2 in label2_list:
            # noinspection PyProtectedMember
            elements2 = InchiMolAtomMapper._get_elements(obmol2, label2)
            if elements1 != elements2:
                continue
            vmol2 = ob.OBMol()
            for i in label2:
                vmol2.AddAtom(obmol2.GetAtom(i))
            aligner.SetTargetMol(vmol2)
            aligner.Align()
            rmsd = aligner.GetRMSD()
            if rmsd < least_rmsd:
                least_rmsd = rmsd
                best_label2 = copy.copy(label2)
        return label1, best_label2
Example #11
0
 def test_group_molecules(self):
     mm = MoleculeMatcher(tolerance=0.001)
     filename_list = None
     with open(os.path.join(test_dir, "mol_list.txt")) as f:
         filename_list = [line.strip() for line in f.readlines()]
     mol_list = [BabelMolAdaptor.from_file(os.path.join(test_dir, f)).pymatgen_mol\
                 for f in filename_list]
     mol_groups = mm.group_molecules(mol_list)
     filename_groups = [[filename_list[mol_list.index(m)] for m in g] for g \
                        in mol_groups]
     grouped_text = None
     with open(os.path.join(test_dir, "grouped_mol_list.txt")) as f:
         grouped_text = f.read().strip()
     self.assertEqual(str(filename_groups), grouped_text)
Example #12
0
 def test_group_molecules(self):
     mm = MoleculeMatcher(tolerance=0.001)
     filename_list = None
     with open(os.path.join(test_dir, "mol_list.txt")) as f:
         filename_list = [line.strip() for line in f.readlines()]
     mol_list = [BabelMolAdaptor.from_file(os.path.join(test_dir, f)).pymatgen_mol\
                 for f in filename_list]
     mol_groups = mm.group_molecules(mol_list)
     filename_groups = [[filename_list[mol_list.index(m)] for m in g] for g \
                        in mol_groups]
     grouped_text = None
     with open(os.path.join(test_dir, "grouped_mol_list.txt")) as f:
         grouped_text = f.read().strip()
     self.assertEqual(str(filename_groups), grouped_text)
Example #13
0
def read_mol(filename):
    """
    Reads a molecule based on file extension. For example, anything ending in
    a "xyz" is assumed to be a XYZ file. Supported formats include xyz,
    gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and
    pymatgen's JSON serialized molecules. Using openbabel,
    many more extensions are supported but requires openbabel to be installed.

    Args:
        filename:
            A filename to read from.

    Returns:
        A Molecule object.
    """
    fname = os.path.basename(filename)
    if fnmatch(fname.lower(), "*.xyz*"):
        return XYZ.from_file(filename).molecule
    elif any([
            fnmatch(fname.lower(), "*.{}*".format(r))
            for r in ["gjf", "g03", "g09", "com", "inp"]
    ]):
        return GaussianInput.from_file(filename).molecule
    elif any([
            fnmatch(fname.lower(), "*.{}*".format(r))
            for r in ["out", "lis", "log"]
    ]):
        return GaussianOutput(filename).final_structure
    elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"):
        with zopen(filename) as f:
            s = json.load(f, cls=PMGJSONDecoder)
            if type(s) != Molecule:
                raise IOError("File does not contain a valid serialized "
                              "molecule")
            return s
    else:
        m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)",
                      filename.lower())
        if m:
            return BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol

    raise ValueError("Unrecognized file extension!")
Example #14
0
def read_mol(filename):
    """
    Reads a molecule based on file extension. For example, anything ending in
    a "xyz" is assumed to be a XYZ file. Supported formats include xyz,
    gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and
    pymatgen's JSON serialized molecules. Using openbabel,
    many more extensions are supported but requires openbabel to be installed.

    Args:
        filename:
            A filename to read from.

    Returns:
        A Molecule object.
    """
    fname = os.path.basename(filename)
    if fnmatch(fname.lower(), "*.xyz*"):
        return XYZ.from_file(filename).molecule
    elif any([fnmatch(fname.lower(), "*.{}*".format(r))
              for r in ["gjf", "g03", "g09", "com", "inp"]]):
        return GaussianInput.from_file(filename).molecule
    elif any([fnmatch(fname.lower(), "*.{}*".format(r))
              for r in ["out", "lis", "log"]]):
        return GaussianOutput(filename).final_structure
    elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"):
        with zopen(filename) as f:
            s = json.load(f, cls=PMGJSONDecoder)
            if type(s) != Molecule:
                raise IOError("File does not contain a valid serialized "
                              "molecule")
            return s
    else:
        m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)",
                      filename.lower())
        if m:
            return BabelMolAdaptor.from_file(filename,
                                             m.group(1)).pymatgen_mol

    raise ValueError("Unrecognized file extension!")
Example #15
0
    def _calc_rms(self, mol1, mol2, clabel1, clabel2):
        """
        Calculate the RMSD.

        Args:
            mol1:
                The first molecule. OpenBabel OBMol or pymatgen Molecule object
            mol2:
                The second molecule. OpenBabel OBMol or pymatgen Molecule
                object
            clabel1:
                The atom indices that can reorder the first molecule to
                uniform atom order
            clabel1:
                The atom indices that can reorder the second molecule to
                uniform atom order

        Returns:
            The RMSD.
        """
        obmol1 = BabelMolAdaptor(mol1).openbabel_mol
        obmol2 = BabelMolAdaptor(mol2).openbabel_mol

        cmol1 = ob.OBMol()
        for i in clabel1:
            oa1 = obmol1.GetAtom(i)
            a1 = cmol1.NewAtom()
            a1.SetAtomicNum(oa1.GetAtomicNum())
            a1.SetVector(oa1.GetVector())
        cmol2 = ob.OBMol()
        for i in clabel2:
            oa2 = obmol2.GetAtom(i)
            a2 = cmol2.NewAtom()
            a2.SetAtomicNum(oa2.GetAtomicNum())
            a2.SetVector(oa2.GetVector())

        aligner = ob.OBAlign(True, False)
        aligner.SetRefMol(cmol1)
        aligner.SetTargetMol(cmol2)
        aligner.Align()
        return aligner.GetRMSD()
Example #16
0
 def test_from_string(self):
     xyz = XYZ(self.mol)
     adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H4 C1")
Example #17
0
 def test_from_file(self):
     adaptor = BabelMolAdaptor.from_file(
         os.path.join(test_dir, "Ethane_e.pdb"), "pdb")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H6 C2")
Example #18
0
 def test_from_string(self):
     xyz = XYZ(self.mol)
     adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H4 C1")
Example #19
0
 def test_from_file(self):
     adaptor = BabelMolAdaptor.from_file(
         os.path.join(test_dir, "Ethane_e.pdb"), "pdb")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H6 C2")
Example #20
0
    def test_fit_with_mapper(self, mapper):
        coords = [[0.000000, 0.000000, 0.000000],
                  [0.000000, 0.000000, 1.089000],
                  [1.026719, 0.000000, -0.363000],
                  [-0.513360, -0.889165, -0.363000],
                  [-0.513360, 0.889165, -0.363000]]
        mol1 = Molecule(["C", "H", "H", "H", "H"], coords)
        op = SymmOp.from_origin_axis_angle([0, 0, 0], [0.1, 0.2, 0.3], 60)
        rotcoords = [op.operate(c) for c in coords]
        mol2 = Molecule(["C", "H", "H", "H", "H"], rotcoords)
        mm = MoleculeMatcher(mapper=mapper)
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "benzene1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "benzene2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "benzene1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t2.xyz")).pymatgen_mol
        self.assertFalse(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "c1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "c2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "j1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "j2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "ethene1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "ethene2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "toluene1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "toluene2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "cyclohexane1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "cyclohexane2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "oxygen1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "oxygen2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mm = MoleculeMatcher(tolerance=0.001, mapper=mapper)
        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol
        self.assertFalse(mm.fit(mol1, mol2))
Example #21
0
 def test_get_rmsd(self):
     mm = MoleculeMatcher()
     mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol
     mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol
     self.assertEqual('{0:7.3}'.format(mm.get_rmsd(mol1, mol2)), "0.00488")
Example #22
0
    def fit_with_mapper(self, mapper):
        coords = [[0.000000, 0.000000, 0.000000],
                  [0.000000, 0.000000, 1.089000],
                  [1.026719, 0.000000, -0.363000],
                  [-0.513360, -0.889165, -0.363000],
                  [-0.513360, 0.889165, -0.363000]]
        mol1 = Molecule(["C", "H", "H", "H", "H"], coords)
        op = SymmOp.from_origin_axis_angle([0, 0, 0], [0.1, 0.2, 0.3], 60)
        rotcoords = [op.operate(c) for c in coords]
        mol2 = Molecule(["C", "H", "H", "H", "H"], rotcoords)
        mm = MoleculeMatcher(mapper=mapper)
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "benzene1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "benzene2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "benzene1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "t2.xyz")).pymatgen_mol
        self.assertFalse(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "c1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "c2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "t3.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "t4.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "j1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "j2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "ethene1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "ethene2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "toluene1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "toluene2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(
            os.path.join(test_dir, "cyclohexane1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(
            os.path.join(test_dir, "cyclohexane2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "oxygen1.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(
            test_dir, "oxygen2.xyz")).pymatgen_mol
        self.assertTrue(mm.fit(mol1, mol2))

        mm = MoleculeMatcher(tolerance=0.001, mapper=mapper)
        mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "t3.xyz")).pymatgen_mol
        mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                      "t4.xyz")).pymatgen_mol
        self.assertFalse(mm.fit(mol1, mol2))