コード例 #1
0
ファイル: gaussian.py プロジェクト: davidfarinajr/AutoTST
    def validate_irc(self, calc=None):
        """
        A method to verify an IRC calc
        """
        assert "irc" in calc.label, "The calculator provided is not an IRC calculator"

        reaction_label = calc.label.split("_irc")[0]

        logging.info("Validating IRC file...")
        irc_path = os.path.join(calc.scratch, calc.label + ".log")

        if not os.path.exists(irc_path):
            logging.info("It seems that the IRC claculation has not been run.")
            return False

        f = open(irc_path, "r")
        file_lines = f.readlines()[-5:]

        completed = False
        for file_line in file_lines:
            if "Normal termination" in file_line:
                logging.info("IRC successfully ran")
                completed = True
        if not completed:
            logging.info("IRC failed... could not be validated...")
            return False

        pth1 = list()
        steps = list()
        with open(irc_path) as outputFile:
            for line in outputFile:
                line = line.strip()

                if line.startswith('Point Number:'):
                    if int(line.split()[2]) > 0:
                        if int(line.split()[-1]) == 1:
                            ptNum = int(line.split()[2])
                            pth1.append(ptNum)
                        else:
                            pass
                elif line.startswith('# OF STEPS ='):
                    numStp = int(line.split()[-1])
                    steps.append(numStp)
        # This indexes the coordinate to be used from the parsing
        if steps == []:
            logging.error('No steps taken in the IRC calculation!')
            return False
        else:
            pth1End = sum(steps[:pth1[-1]])
            # Compare the reactants and products
            ircParse = ccread(irc_path)
            # cf.
            # http://cclib.sourceforge.net/wiki/index.php/Using_cclib#Additional_information

            atomcoords = ircParse.atomcoords
            atomnos = ircParse.atomnos
            # Convert the IRC geometries into RMG molecules
            # We don't know which is reactant or product, so take the two at the end of the
            # paths and compare to the reactants and products
            mol1 = RMGMolecule()
            mol1.fromXYZ(atomnos, atomcoords[pth1End])
            mol2 = RMGMolecule()
            mol2.fromXYZ(atomnos, atomcoords[-1])

            testReaction = RMGReaction(
                reactants=mol1.split(),
                products=mol2.split(),
            )

            r, p = reaction_label.split("_")

            reactants = []
            products = []

            for react in r.split("+"):
                react = RMGMolecule(SMILES=react)
                reactants.append(react)

            for prod in p.split("+"):
                prod = RMGMolecule(SMILES=prod)
                products.append(prod)

            possible_reactants = []
            possible_products = []
            for reactant in reactants:
                possible_reactants.append(
                    reactant.generate_resonance_structures())

            for product in products:
                possible_products.append(
                    product.generate_resonance_structures())

            possible_reactants = list(itertools.product(*possible_reactants))
            possible_products = list(itertools.product(*possible_products))

            for possible_reactant in possible_reactants:
                reactant_list = []
                for react in possible_reactant:
                    reactant_list.append(react.toSingleBonds())

                for possible_product in possible_products:
                    product_list = []
                    for prod in possible_product:
                        product_list.append(prod.toSingleBonds())

                    targetReaction = RMGReaction(reactants=list(reactant_list),
                                                 products=list(product_list))

                    if targetReaction.isIsomorphic(testReaction):
                        logging.info("IRC calculation was successful!")
                        return True
            logging.info("IRC calculation failed for {} :(".format(calc.label))
            return False
コード例 #2
0
ファイル: reaction.py プロジェクト: GalaxyFollower/AutoTST
class TS(Conformer):
    """
    A class that defines the 3D geometry of a transition state (TS)
    """
    def __init__(self,
                 smiles=None,
                 reaction_label=None,
                 direction='forward',
                 rmg_molecule=None,
                 reaction_family="H_Abstraction",
                 distance_data=None,
                 index=0):

        self.energy = None
        self.reaction_label = reaction_label
        self.direction = direction.lower()
        self.reaction_family = reaction_family
        self.distance_data = distance_data
        self.index = index
        self.bm = None

        assert direction in ["forward",
                             "reverse"], "Please provide a valid direction"

        self._rdkit_molecule = None
        self._ase_molecule = None

        if (smiles or rmg_molecule):
            if smiles and rmg_molecule:
                assert rmg_molecule.isIsomorphic(
                    RMGMolecule(SMILES=smiles)
                ), "SMILES string did not match RMG Molecule object"
                self.smiles = smiles
                self.rmg_molecule = rmg_molecule

            elif rmg_molecule:
                self.rmg_molecule = rmg_molecule
                self.smiles = rmg_molecule.toSMILES()

            else:
                self.smiles = smiles
                self.rmg_molecule = RMGMolecule(SMILES=smiles)

            self.rmg_molecule.updateMultiplicity()
            self._symmetry_number = None

        else:
            self.smiles = None
            self.rmg_molecule = None
            self.rdkit_molecule = None
            self._pseudo_geometry = None
            self.ase_molecule = None
            self.bonds = []
            self.angles = []
            self.torsions = []
            self.cistrans = []
            self.chiral_centers = []
            self._symmetry_number = None

    def __repr__(self):
        return '<TS "{}">'.format(self.smiles)

    def copy(self):
        copy_conf = TS(reaction_label=self.reaction_label,
                       reaction_family=self.reaction_family)
        copy_conf.smiles = self.smiles
        copy_conf.rmg_molecule = self.rmg_molecule.copy()
        copy_conf.rdkit_molecule = self.rdkit_molecule.__copy__()
        copy_conf._pseudo_geometry = self._pseudo_geometry.__copy__()
        copy_conf.ase_molecule = self.ase_molecule.copy()
        copy_conf.get_geometries()
        copy_conf.energy = self.energy
        copy_conf._symmetry_number = self._symmetry_number
        return copy_conf

    @property
    def symmetry_number(self):

        if not self._symmetry_number:
            self._symmetry_number = self.calculate_symmetry_number()
        return self._symmetry_number

    @property
    def rdkit_molecule(self):
        if (self._rdkit_molecule is None) and self.distance_data:
            self._rdkit_molecule = self.get_rdkit_mol()
        return self._rdkit_molecule

    @property
    def ase_molecule(self):
        if (self._ase_molecule is None):
            self._ase_molecule = self.get_ase_mol()
        return self._ase_molecule

    def get_rdkit_mol(self):
        """
        A method to create an rdkit geometry... slightly different than that of the conformer method
        returns both the rdkit_molecule and the bm
        """
        self.rdkit_molecule = Conformer(
            rmg_molecule=self.rmg_molecule).get_rdkit_mol()

        self.get_labels()
        for i, atom in enumerate(self.rmg_molecule.atoms):
            assert atom.number == self.rdkit_molecule.GetAtoms(
            )[i].GetAtomicNum()

        if len(self.labels) == 3:

            rd_copy = Chem.RWMol(self.rdkit_molecule.__copy__())

            lbl1, lbl2, lbl3 = self.labels

            if not rd_copy.GetBondBetweenAtoms(lbl1, lbl2):
                rd_copy.AddBond(lbl1,
                                lbl2,
                                order=rdkit.Chem.rdchem.BondType.SINGLE)
            elif not rd_copy.GetBondBetweenAtoms(lbl2, lbl3):
                rd_copy.AddBond(lbl2,
                                lbl3,
                                order=rdkit.Chem.rdchem.BondType.SINGLE)

            self._pseudo_geometry = rd_copy

        logging.info("Initially embedded molecule")

        self.bm = None

        if self.distance_data:
            logging.info("Getting bounds matrix")
            self.bm = self.get_bounds_matrix()

            if len(self.labels) > 0:
                logging.info("Editing bounds matrix")
                self.bm = self.edit_matrix()

            logging.info("Performing triangle smoothing on bounds matrix.")
            DistanceGeometry.DoTriangleSmoothing(self.bm)

            logging.info("Now attempting to embed using edited bounds matrix.")

            self.rd_embed()
        return self.rdkit_molecule

    def get_bounds_matrix(self):
        """
        A method to obtain the bounds matrix
        """
        self.bm = rdDistGeom.GetMoleculeBoundsMatrix(self.rdkit_molecule)
        return self.bm

    def set_limits(self, lbl1, lbl2, value, uncertainty):
        """
        A method to set the limits of a particular distance between two atoms

        :param bm: an array of arrays corresponding to the bounds matrix
        :param lbl1: the label of one atom
        :param lbl2: the label of another atom
        :param value: the distance from a distance data object (float)
        :param uncertainty: the uncertainty of the `value` distance (float)
        :return bm: an array of arrays corresponding to the edited bounds matrix
        """
        logging.info(
            "For atoms {0} and {1} we have a distance of: \t {2}".format(
                lbl1, lbl2, value))
        if lbl1 > lbl2:
            self.bm[lbl2][lbl1] = value + uncertainty / 2
            self.bm[lbl1][lbl2] = max(0, value - uncertainty / 2)
        else:
            self.bm[lbl2][lbl1] = max(0, value - uncertainty / 2)
            self.bm[lbl1][lbl2] = value + uncertainty / 2

        return self.bm

    def bm_pre_edit(self, sect):
        """
        Clean up some of the atom distance limits before attempting triangle smoothing.
        This ensures any edits made do not lead to unsolvable scenarios for the molecular
        embedding algorithm.

        sect is the list of atom indices belonging to one species.
        """
        others = list(range(len(self.bm)))
        for idx in sect:
            others.remove(idx)

        for i in range(len(self.bm)):  # sect:
            for j in range(i):  # others:
                if i < j:
                    continue
                for k in range(len(self.bm)):
                    if k == i or k == j or i == j:
                        continue
                    Uik = self.bm[i, k] if k > i else self.bm[k, i]
                    Ukj = self.bm[j, k] if k > j else self.bm[k, j]

                    maxLij = Uik + Ukj - 0.1
                    if self.bm[i, j] > maxLij:
                        logging.info("Changing lower limit {0} to {1}".format(
                            self.bm[i, j], maxLij))
                        self.bm[i, j] = maxLij

        return self.bm

    def get_labels(self):
        """
        A method to get the labeled atoms from a reaction

        :param reactants: a combined rmg_molecule object
        :return labels: the atom labels corresponding to the reaction center
        :return atomMatch: a tuple of tuples the atoms labels corresponding to the reaction center
        """

        if len(self.rmg_molecule.getLabeledAtoms()) == 0:
            labels = []
            atomMatch = ()

        if self.reaction_family.lower() in [
                'h_abstraction', 'r_addition_multiplebond', 'intra_h_migration'
        ]:
            # for i, atom in enumerate(reactants.atoms):
            lbl1 = self.rmg_molecule.getLabeledAtoms()["*1"].sortingLabel
            lbl2 = self.rmg_molecule.getLabeledAtoms()["*2"].sortingLabel
            lbl3 = self.rmg_molecule.getLabeledAtoms()["*3"].sortingLabel
            labels = [lbl1, lbl2, lbl3]
            atomMatch = ((lbl1, ), (lbl2, ), (lbl3, ))
        elif self.reaction_family.lower() in ['disproportionation']:
            lbl1 = self.rmg_molecule.getLabeledAtoms()["*2"].sortingLabel
            lbl2 = self.rmg_molecule.getLabeledAtoms()["*4"].sortingLabel
            lbl3 = self.rmg_molecule.getLabeledAtoms()["*1"].sortingLabel

            labels = [lbl1, lbl2, lbl3]
            atomMatch = ((lbl1, ), (lbl2, ), (lbl3, ))

        #logging.info("The labled atoms are {}.".format(labels))
        self.labels = labels
        self.atom_match = atomMatch
        return self.labels, self.atom_match

    def edit_matrix(self):
        """
        A method to edit the bounds matrix using labels and distance data
        """

        lbl1, lbl2, lbl3 = self.labels

        sect = []

        for atom in self.rmg_molecule.split()[0].atoms:
            sect.append(atom.sortingLabel)

        uncertainties = {'d12': 0.02, 'd13': 0.02, 'd23': 0.02}
        self.bm = self.set_limits(lbl1, lbl2,
                                  self.distance_data.distances['d12'],
                                  uncertainties['d12'])
        self.bm = self.set_limits(lbl2, lbl3,
                                  self.distance_data.distances['d23'],
                                  uncertainties['d23'])
        self.bm = self.set_limits(lbl1, lbl3,
                                  self.distance_data.distances['d13'],
                                  uncertainties['d13'])

        self.bm = self.bm_pre_edit(sect)

        return self.bm

    def optimize_rdkit_molecule(self):
        """
        Optimizes the rdmol object using UFF.
        Determines the energy level for each of the conformers identified in rdmol.GetConformer.


        :param rdmol:
        :param boundsMatrix:
        :param atomMatch:
        :return rdmol, minEid (index of the lowest energy conformer)
        """

        energy = 0.0
        minEid = 0
        lowestE = 9.999999e99  # start with a very high number, which would never be reached

        for conf in self.rdkit_molecule.GetConformers():
            if (self.bm is None) or (self.atom_match is None):
                AllChem.UFFOptimizeMolecule(self.rdkit_molecule,
                                            confId=conf.GetId())
                energy = AllChem.UFFGetMoleculeForceField(
                    self.rdkit_molecule, confId=conf.GetId()).CalcEnergy()
            else:
                _, energy = EmbedLib.OptimizeMol(self.rdkit_molecule,
                                                 self.bm,
                                                 atomMatches=self.atom_match,
                                                 forceConstant=100000.0)

            if energy < lowestE:
                minEid = conf.GetId()
                lowestE = energy

        return self.rdkit_molecule, minEid

    def rd_embed(self):
        """
        This portion of the script is literally taken from rmgpy but hacked to work without defining a geometry object

        Embed the RDKit molecule and create the crude molecule file.
        """
        numConfAttempts = 10000
        if (self.bm is None) or (self.atom_match is None):
            AllChem.EmbedMultipleConfs(self.rdkit_molecule,
                                       numConfAttempts,
                                       randomSeed=1)

            self.rdkit_molecule, minEid = self.optimize_rdkit_molecule()
        else:
            """
            Embed the molecule according to the bounds matrix. Built to handle possible failures
            of some of the embedding attempts.
            """
            self.rdkit_molecule.RemoveAllConformers()
            for i in range(0, numConfAttempts):
                try:
                    EmbedLib.EmbedMol(self.rdkit_molecule,
                                      self.bm,
                                      atomMatch=self.atom_match)
                    break
                except ValueError:
                    logging.info(
                        "RDKit failed to embed on attempt {0} of {1}".format(
                            i + 1, numConfAttempts))
                except RuntimeError:
                    logging.info("RDKit failed to embed.")
            else:
                logging.error("RDKit failed all attempts to embed")
                return None, None
            """
            RDKit currently embeds the conformers and sets the id as 0, so even though multiple
            conformers have been generated, only 1 can be called. Below the id's are resolved.
            """
            for i in range(len(self.rdkit_molecule.GetConformers())):
                self.rdkit_molecule.GetConformers()[i].SetId(i)

            self.rdkit_molecule, minEid = self.optimize_rdkit_molecule()

        return self.rdkit_molecule, minEid

    def get_bonds(self):
        test_conf = Conformer()
        test_conf.rmg_molecule = self.rmg_molecule
        test_conf.rdkit_molecule = self._pseudo_geometry
        test_conf.ase_molecule = self.ase_molecule
        return test_conf.get_bonds()

    def get_torsions(self):
        test_conf = Conformer()
        test_conf.rmg_molecule = self.rmg_molecule
        test_conf.rdkit_molecule = self._pseudo_geometry
        test_conf.ase_molecule = self.ase_molecule
        return test_conf.get_torsions()

    def get_angles(self):
        test_conf = Conformer()
        test_conf.rmg_molecule = self.rmg_molecule
        test_conf.rdkit_molecule = self._pseudo_geometry
        test_conf.ase_molecule = self.ase_molecule
        return test_conf.get_angles()
コード例 #3
0
ファイル: gaussian.py プロジェクト: gitter-badger/AutoTST
    def validate_irc(self):  # TODO: need to add more verification here
        logging.info("Validating IRC file...")
        irc_path = os.path.join(self.irc_calc.scratch,
                                self.irc_calc.label + ".log")
        if not os.path.exists(irc_path):
            logging.info(
                "It seems that the file was `fixed`, reading in the `fixed` version.")
            irc_path = irc_path.replace("left", "(").replace("right", ")")

            if not os.path.exists(irc_path):
                logging.info(
                    "It seems that the IRC claculation has not been run.")
                return False

        f = open(irc_path, "r")
        file_lines = f.readlines()[-5:]

        completed = False
        for file_line in file_lines:
            if " Normal termination" in file_line:
                logging.info("IRC successfully ran")
                completed = True
        if completed == False:
            logging.info("IRC failed... could not be validated...")
            return False

        pth1 = list()
        steps = list()
        with open(irc_path) as outputFile:
            for line in outputFile:
                line = line.strip()

                if line.startswith('Point Number:'):
                    if int(line.split()[2]) > 0:
                        if int(line.split()[-1]) == 1:
                            ptNum = int(line.split()[2])
                            pth1.append(ptNum)
                        else:
                            pass
                elif line.startswith('# OF STEPS ='):
                    numStp = int(line.split()[-1])
                    steps.append(numStp)
        # This indexes the coordinate to be used from the parsing
        if steps == []:
            logging.error('No steps taken in the IRC calculation!')
            return False
        else:
            pth1End = sum(steps[:pth1[-1]])
            # Compare the reactants and products
            ircParse = ccread(irc_path)
            # cf. http://cclib.sourceforge.net/wiki/index.php/Using_cclib#Additional_information

            atomcoords = ircParse.atomcoords
            atomnos = ircParse.atomnos
            # Convert the IRC geometries into RMG molecules
            # We don't know which is reactant or product, so take the two at the end of the
            # paths and compare to the reactants and products
            mol1 = Molecule()
            mol1.fromXYZ(atomnos, atomcoords[pth1End])
            mol2 = Molecule()
            mol2.fromXYZ(atomnos, atomcoords[-1])

            testReaction = Reaction(
                reactants=mol1.split(),
                products=mol2.split(),
            )

            if isinstance(self.reaction.rmg_reaction.reactants[0], rmgpy.molecule.Molecule):
                targetReaction = Reaction(
                    reactants=[reactant.toSingleBonds()
                               for reactant in self.reaction.rmg_reaction.reactants],
                    products=[product.toSingleBonds()
                              for product in self.reaction.rmg_reaction.products],
                )
            elif isinstance(self.reaction.rmg_reaction.reactants[0], rmgpy.species.Species):
                targetReaction = Reaction(
                    reactants=[reactant.molecule[0].toSingleBonds()
                               for reactant in self.reaction.rmg_reaction.reactants],
                    products=[product.molecule[0].toSingleBonds()
                              for product in self.reaction.rmg_reaction.products],
                )

            if targetReaction.isIsomorphic(testReaction):
                return True
            else:
                return False
コード例 #4
0
ファイル: gaussian.py プロジェクト: GalaxyFollower/AutoTST
    def validate_irc(self):
        """
        A method to verify an IRC calc
        """

        logging.info("Validating IRC file...")
        irc_path = os.path.join(
            self.directory, "ts", self.conformer.reaction_label, "irc",
            self.conformer.reaction_label + "_irc_" +
            self.conformer.direction + "_" + str(self.conformer.index) +
            ".log")

        complete, converged = self.verify_output_file(irc_path)
        if not complete:
            logging.info("It seems that the IRC claculation did not complete")
            return False
        if not converged:
            logging.info("The IRC calculation did not converge...")
            return False

        pth1 = list()
        steps = list()
        with open(irc_path) as outputFile:
            for line in outputFile:
                line = line.strip()

                if line.startswith('Point Number:'):
                    if int(line.split()[2]) > 0:
                        if int(line.split()[-1]) == 1:
                            ptNum = int(line.split()[2])
                            pth1.append(ptNum)
                        else:
                            pass
                elif line.startswith('# OF STEPS ='):
                    numStp = int(line.split()[-1])
                    steps.append(numStp)
        # This indexes the coordinate to be used from the parsing
        if steps == []:
            logging.error('No steps taken in the IRC calculation!')
            return False
        else:
            pth1End = sum(steps[:pth1[-1]])
            # Compare the reactants and products
            ircParse = ccread(irc_path)

            atomcoords = ircParse.atomcoords
            atomnos = ircParse.atomnos

            mol1 = RMGMolecule()
            mol1.fromXYZ(atomnos, atomcoords[pth1End])
            mol2 = RMGMolecule()
            mol2.fromXYZ(atomnos, atomcoords[-1])

            testReaction = RMGReaction(
                reactants=mol1.split(),
                products=mol2.split(),
            )

            r, p = self.conformer.reaction_label.split("_")

            reactants = []
            products = []

            for react in r.split("+"):
                react = RMGMolecule(SMILES=react)
                reactants.append(react)

            for prod in p.split("+"):
                prod = RMGMolecule(SMILES=prod)
                products.append(prod)

            possible_reactants = []
            possible_products = []
            for reactant in reactants:
                possible_reactants.append(
                    reactant.generate_resonance_structures())

            for product in products:
                possible_products.append(
                    product.generate_resonance_structures())

            possible_reactants = list(itertools.product(*possible_reactants))
            possible_products = list(itertools.product(*possible_products))

            for possible_reactant in possible_reactants:
                reactant_list = []
                for react in possible_reactant:
                    reactant_list.append(react.toSingleBonds())

                for possible_product in possible_products:
                    product_list = []
                    for prod in possible_product:
                        product_list.append(prod.toSingleBonds())

                    targetReaction = RMGReaction(reactants=list(reactant_list),
                                                 products=list(product_list))

                    if targetReaction.isIsomorphic(testReaction):
                        logging.info("IRC calculation was successful!")
                        return True
            logging.info("IRC calculation failed for {} :(".format(calc.label))
            return False