class Conformer(): """ A class for generating and editing 3D conformers of molecules """ def __init__(self, smiles=None, rmg_molecule=None, index=0): self.energy = None self.index = index if (smiles or rmg_molecule): if smiles and rmg_molecule: assert rmg_molecule.isIsomorphic(RMGMolecule( SMILES=smiles)), "SMILES string did not match RMG Molecule object" self.smiles = smiles self.rmg_molecule = rmg_molecule elif rmg_molecule: self.rmg_molecule = rmg_molecule self.smiles = rmg_molecule.toSMILES() else: self.smiles = smiles self.rmg_molecule = RMGMolecule(SMILES=smiles) self.rmg_molecule.updateMultiplicity() self.get_molecules() self.get_geometries() self._symmetry_number = None else: self.smiles = None self.rmg_molecule = None self.rdkit_molecule = None self.ase_molecule = None self.bonds = [] self.angles = [] self.torsions = [] self.cistrans = [] self.chiral_centers = [] self._symmetry_number = None def __repr__(self): return '<Conformer "{}">'.format(self.smiles) def copy(self): copy_conf = Conformer() copy_conf.smiles = self.smiles copy_conf.rmg_molecule = self.rmg_molecule.copy() copy_conf.rdkit_molecule = self.rdkit_molecule.__copy__() copy_conf.ase_molecule = self.ase_molecule.copy() copy_conf.get_geometries() copy_conf.energy = self.energy return copy_conf @property def symmetry_number(self): if not self._symmetry_number: self._symmetry_number = self.calculate_symmetry_number() return self._symmetry_number def get_rdkit_mol(self): """ A method for creating an rdkit geometry from an rmg mol """ assert self.rmg_molecule, "Cannot create an RDKit geometry without an RMG molecule object" RDMol = self.rmg_molecule.toRDKitMol(removeHs=False) rdkit.Chem.AllChem.EmbedMolecule(RDMol) self.rdkit_molecule = RDMol mol_list = AllChem.MolToMolBlock(self.rdkit_molecule).split('\n') for i, atom in enumerate(self.rmg_molecule.atoms): j = i + 4 coords = mol_list[j].split()[:3] for k, coord in enumerate(coords): coords[k] = float(coord) atom.coords = np.array(coords) return self.rdkit_molecule def get_ase_mol(self): """ A method for creating an ase atoms object from an rdkit mol """ if not self.rdkit_molecule: self.get_rdkit_mol() mol_list = AllChem.MolToMolBlock(self.rdkit_molecule).split('\n') ase_atoms = [] for i, line in enumerate(mol_list): if i > 3: try: atom0, atom1, bond, rest = line atom0 = int(atom0) atom0 = int(atom1) bond = float(bond) except ValueError: try: x, y, z, symbol = line.split()[0:4] x = float(x) y = float(y) z = float(z) ase_atoms.append( Atom(symbol=symbol, position=(x, y, z))) except BaseException: continue self.ase_molecule = Atoms(ase_atoms) return self.ase_molecule def get_molecules(self): if not self.rmg_molecule: self.rmg_molecule = RMGMolecule(SMILES=self.smiles) self.rdkit_molecule = self.get_rdkit_mol() self.ase_molecule = self.get_ase_mol() self.get_geometries() return self.rdkit_molecule, self.ase_molecule def view(self): """ A method designed to create a 3D figure of the AutoTST_Molecule with py3Dmol from the rdkit_molecule """ mb = Chem.MolToMolBlock(self.rdkit_molecule) p = py3Dmol.view(width=600, height=600) p.addModel(mb, "sdf") p.setStyle({'stick': {}}) p.setBackgroundColor('0xeeeeee') p.zoomTo() return p.show() def get_bonds(self): """ A method for identifying all of the bonds in a conformer """ bond_list = [] for bond in self.rdkit_molecule.GetBonds(): bond_list.append((bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())) bonds = [] for index, indices in enumerate(bond_list): i, j = indices length = self.ase_molecule.get_distance(i, j) center = False if ((self.rmg_molecule.atoms[i].label) and ( self.rmg_molecule.atoms[j].label)): center = True bond = Bond(index=index, atom_indices=indices, length=length, reaction_center=center) mask = self.get_mask(bond) bond.mask = mask bonds.append(bond) self.bonds = bonds return self.bonds def get_angles(self): """ A method for identifying all of the angles in a conformer """ angle_list = [] for atom1 in self.rdkit_molecule.GetAtoms(): for atom2 in atom1.GetNeighbors(): for atom3 in atom2.GetNeighbors(): if atom1.GetIdx() == atom3.GetIdx(): continue to_add = (atom1.GetIdx(), atom2.GetIdx(), atom3.GetIdx()) if (to_add in angle_list) or ( tuple(reversed(to_add)) in angle_list): continue angle_list.append(to_add) angles = [] for index, indices in enumerate(angle_list): i, j, k = indices degree = self.ase_molecule.get_angle(i, j, k) ang = Angle(index=index, atom_indices=indices, degree=degree, mask=[]) mask = self.get_mask(ang) reaction_center = False angles.append(Angle(index=index, atom_indices=indices, degree=degree, mask=mask, reaction_center=reaction_center)) self.angles = angles return self.angles def get_torsions(self): """ A method for identifying all of the torsions in a conformer """ torsion_list = [] for bond1 in self.rdkit_molecule.GetBonds(): atom1 = bond1.GetBeginAtom() atom2 = bond1.GetEndAtom() if atom1.IsInRing() or atom2.IsInRing(): # Making sure that bond1 we're looking at are not in a ring continue bond_list1 = list(atom1.GetBonds()) bond_list2 = list(atom2.GetBonds()) if not len(bond_list1) > 1 and not len(bond_list2) > 1: # Making sure that there are more than one bond attached to # the atoms we're looking at continue # Getting the 0th and 3rd atom and insuring that atoms # attached to the 1st and 2nd atom are not terminal hydrogens # We also make sure that all of the atoms are properly bound # together # If the above are satisfied, we append a tuple of the torsion our # torsion_list got_atom0 = False got_atom3 = False for bond0 in bond_list1: atomX = bond0.GetOtherAtom(atom1) # if atomX.GetAtomicNum() == 1 and len(atomX.GetBonds()) == 1: # This means that we have a terminal hydrogen, skip this # NOTE: for H_abstraction TSs, a non teminal H should exist # continue if atomX.GetIdx() != atom2.GetIdx(): got_atom0 = True atom0 = atomX for bond2 in bond_list2: atomY = bond2.GetOtherAtom(atom2) # if atomY.GetAtomicNum() == 1 and len(atomY.GetBonds()) == 1: # This means that we have a terminal hydrogen, skip this # continue if atomY.GetIdx() != atom1.GetIdx(): got_atom3 = True atom3 = atomY if not (got_atom0 and got_atom3): # Making sure atom0 and atom3 were not found continue # Looking to make sure that all of the atoms are properly bonded to # eached if ( "SINGLE" in str( self.rdkit_molecule.GetBondBetweenAtoms( atom1.GetIdx(), atom2.GetIdx()).GetBondType()) and self.rdkit_molecule.GetBondBetweenAtoms( atom0.GetIdx(), atom1.GetIdx()) and self.rdkit_molecule.GetBondBetweenAtoms( atom1.GetIdx(), atom2.GetIdx()) and self.rdkit_molecule.GetBondBetweenAtoms( atom2.GetIdx(), atom3.GetIdx())): torsion_tup = (atom0.GetIdx(), atom1.GetIdx(), atom2.GetIdx(), atom3.GetIdx()) already_in_list = False for torsion_entry in torsion_list: a, b, c, d = torsion_entry e, f, g, h = torsion_tup if (b, c) == (f, g) or (b, c) == (g, f): already_in_list = True if not already_in_list: torsion_list.append(torsion_tup) torsions = [] for index, indices in enumerate(torsion_list): i, j, k, l = indices dihedral = self.ase_molecule.get_dihedral(i, j, k, l) tor = Torsion(index=index, atom_indices=indices, dihedral=dihedral, mask=[]) mask = self.get_mask(tor) reaction_center = False torsions.append(Torsion(index=index, atom_indices=indices, dihedral=dihedral, mask=mask, reaction_center=reaction_center)) self.torsions = torsions return self.torsions def get_cistrans(self): """ A method for identifying all possible cistrans bonds in a molecule """ torsion_list = [] cistrans_list = [] for bond1 in self.rdkit_molecule.GetBonds(): atom1 = bond1.GetBeginAtom() atom2 = bond1.GetEndAtom() if atom1.IsInRing() or atom2.IsInRing(): # Making sure that bond1 we're looking at are not in a ring continue bond_list1 = list(atom1.GetBonds()) bond_list2 = list(atom2.GetBonds()) if not len(bond_list1) > 1 and not len(bond_list2) > 1: # Making sure that there are more than one bond attached to # the atoms we're looking at continue # Getting the 0th and 3rd atom and insuring that atoms # attached to the 1st and 2nd atom are not terminal hydrogens # We also make sure that all of the atoms are properly bound # together # If the above are satisfied, we append a tuple of the torsion our # torsion_list got_atom0 = False got_atom3 = False for bond0 in bond_list1: atomX = bond0.GetOtherAtom(atom1) # if atomX.GetAtomicNum() == 1 and len(atomX.GetBonds()) == 1: # This means that we have a terminal hydrogen, skip this # NOTE: for H_abstraction TSs, a non teminal H should exist # continue if atomX.GetIdx() != atom2.GetIdx(): got_atom0 = True atom0 = atomX for bond2 in bond_list2: atomY = bond2.GetOtherAtom(atom2) # if atomY.GetAtomicNum() == 1 and len(atomY.GetBonds()) == 1: # This means that we have a terminal hydrogen, skip this # continue if atomY.GetIdx() != atom1.GetIdx(): got_atom3 = True atom3 = atomY if not (got_atom0 and got_atom3): # Making sure atom0 and atom3 were not found continue # Looking to make sure that all of the atoms are properly bonded to # eached if ( "DOUBLE" in str( self.rdkit_molecule.GetBondBetweenAtoms( atom1.GetIdx(), atom2.GetIdx()).GetBondType()) and self.rdkit_molecule.GetBondBetweenAtoms( atom0.GetIdx(), atom1.GetIdx()) and self.rdkit_molecule.GetBondBetweenAtoms( atom1.GetIdx(), atom2.GetIdx()) and self.rdkit_molecule.GetBondBetweenAtoms( atom2.GetIdx(), atom3.GetIdx())): torsion_tup = (atom0.GetIdx(), atom1.GetIdx(), atom2.GetIdx(), atom3.GetIdx()) already_in_list = False for torsion_entry in torsion_list: a, b, c, d = torsion_entry e, f, g, h = torsion_tup if (b, c) == (f, g) or (b, c) == (g, f): already_in_list = True if not already_in_list: cistrans_list.append(torsion_tup) cistrans = [] for ct_index, indices in enumerate(cistrans_list): i, j, k, l = indices b0 = self.rdkit_molecule.GetBondBetweenAtoms(i, j) b1 = self.rdkit_molecule.GetBondBetweenAtoms(j, k) b2 = self.rdkit_molecule.GetBondBetweenAtoms(k, l) b0.SetBondDir(Chem.BondDir.ENDUPRIGHT) b2.SetBondDir(Chem.BondDir.ENDDOWNRIGHT) Chem.AssignStereochemistry(self.rdkit_molecule, force=True) if "STEREOZ" in str(b1.GetStereo()): if round(self.ase_molecule.get_dihedral(i, j, k, l), -1) == 0: atom = self.rdkit_molecule.GetAtomWithIdx(k) bonds = atom.GetBonds() for bond in bonds: indexes = [ bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] if not ((sorted([j, k]) == sorted(indexes)) or ( sorted([k, l]) == sorted(indexes))): break for index in indexes: if not (index in indices): l = index break indices = [i, j, k, l] stero = "Z" else: if round( self.ase_molecule.get_dihedral( i, j, k, l), -1) == 180: atom = self.rdkit_molecule.GetAtomWithIdx(k) bonds = atom.GetBonds() for bond in bonds: indexes = [ bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] if not ((sorted([j, k]) == sorted(indexes)) or ( sorted([k, l]) == sorted(indexes))): break for index in indexes: if not (index in indices): l = index break indices = [i, j, k, l] stero = "E" dihedral = self.ase_molecule.get_dihedral(i, j, k, l) tor = CisTrans(index=ct_index, atom_indices=indices, dihedral=dihedral, mask=[], stero=stero) mask = self.get_mask(tor) reaction_center = False cistrans.append(CisTrans(index=ct_index, atom_indices=indices, dihedral=dihedral, mask=mask, stero=stero ) ) self.cistrans = cistrans return self.cistrans def get_mask(self, geometry): """ Getting the right hand mask for a geometry object: - self: an AutoTST Conformer object - geometry: a Bond, Angle, Dihedral, or Torsion object """ rdkit_atoms = self.rdkit_molecule.GetAtoms() if (isinstance(geometry, autotst.geometry.Torsion) or isinstance(geometry, autotst.geometry.CisTrans)): L1, L0, R0, R1 = geometry.atom_indices # trying to get the left hand side of this torsion LHS_atoms_index = [L0, L1] RHS_atoms_index = [R0, R1] elif isinstance(geometry, autotst.geometry.Angle): a1, a2, a3 = geometry.atom_indices LHS_atoms_index = [a2, a1] RHS_atoms_index = [a2, a3] elif isinstance(geometry, autotst.geometry.Bond): a1, a2 = geometry.atom_indices LHS_atoms_index = [a1] RHS_atoms_index = [a2] complete_RHS = False i = 0 atom_index = RHS_atoms_index[0] while complete_RHS is False: try: RHS_atom = rdkit_atoms[atom_index] for neighbor in RHS_atom.GetNeighbors(): if (neighbor.GetIdx() in RHS_atoms_index) or ( neighbor.GetIdx() in LHS_atoms_index): continue else: RHS_atoms_index.append(neighbor.GetIdx()) i += 1 atom_index = RHS_atoms_index[i] except IndexError: complete_RHS = True mask = [index in RHS_atoms_index for index in range( len(self.ase_molecule))] return mask def get_chiral_centers(self): """ A method to identify """ centers = rdkit.Chem.FindMolChiralCenters( self.rdkit_molecule, includeUnassigned=True) chiral_centers = [] for index, center in enumerate(centers): atom_index, chirality = center chiral_centers.append( ChiralCenter( index=index, atom_index=atom_index, chirality=chirality)) self.chiral_centers = chiral_centers return self.chiral_centers def get_geometries(self): """ A helper method to obtain all geometry things """ self.bonds = self.get_bonds() self.angles = self.get_angles() self.torsions = self.get_torsions() self.cistrans = self.get_cistrans() self.chiral_centers = self.get_chiral_centers() return ( self.bonds, self.angles, self.torsions, self.cistrans, self.chiral_centers) def update_coords(self): """ A function that creates distance matricies for the RMG, ASE, and RDKit molecules and finds which (if any) are different. If one is different, this will update the coordinates of the other two with the different one. If all three are different, nothing will happen. If all are the same, nothing will happen. """ rdkit_dm = rdkit.Chem.rdmolops.Get3DDistanceMatrix(self.rdkit_molecule) ase_dm = self.ase_molecule.get_all_distances() l = len(self.rmg_molecule.atoms) rmg_dm = np.zeros((l, l)) for i, atom_i in enumerate(self.rmg_molecule.atoms): for j, atom_j in enumerate(self.rmg_molecule.atoms): rmg_dm[i][j] = np.linalg.norm(atom_i.coords - atom_j.coords) d1 = round(abs(rdkit_dm - ase_dm).max(), 3) d2 = round(abs(rdkit_dm - rmg_dm).max(), 3) d3 = round(abs(ase_dm - rmg_dm).max(), 3) if np.all(np.array([d1, d2, d3]) > 0): return False, None if np.any(np.array([d1, d2, d3]) > 0): if d1 == 0: diff = "rmg" self.update_coords_from("rmg") elif d2 == 0: diff = "ase" self.update_coords_from("ase") else: diff = "rdkit" self.update_coords_from("rdkit") return True, diff else: return True, None def update_coords_from(self, mol_type="ase"): """ A method to update the coordinates of the RMG, RDKit, and ASE objects with a chosen object. """ possible_mol_types = ["ase", "rmg", "rdkit"] assert (mol_type.lower() in possible_mol_types), "Please specifiy a valid mol type. Valid types are {}".format( possible_mol_types) if mol_type.lower() == "rmg": conf = self.rdkit_molecule.GetConformers()[0] ase_atoms = [] for i, atom in enumerate(self.rmg_molecule.atoms): x, y, z = atom.coords symbol = atom.symbol conf.SetAtomPosition(i, [x, y, z]) ase_atoms.append(Atom(symbol=symbol, position=(x, y, z))) self.ase_molecule = Atoms(ase_atoms) # self.calculate_symmetry_number() elif mol_type.lower() == "ase": conf = self.rdkit_molecule.GetConformers()[0] for i, position in enumerate(self.ase_molecule.get_positions()): self.rmg_molecule.atoms[i].coords = position conf.SetAtomPosition(i, position) # self.calculate_symmetry_number() elif mol_type.lower() == "rdkit": mol_list = AllChem.MolToMolBlock(self.rdkit_molecule).split('\n') for i, atom in enumerate(self.rmg_molecule.atoms): j = i + 4 coords = mol_list[j].split()[:3] for k, coord in enumerate(coords): coords[k] = float(coord) atom.coords = np.array(coords) self.get_ase_mol() # self.calculate_symmetry_number() def set_bond_length(self, bond_index, length): """ This is a method to set bond lengths Variabels: - bond_index (int): the index of the bond you want to edit - length (float, int): the distance you want to set the bond (in angstroms) """ assert isinstance(length, (float, int)) matched = False for bond in self.bonds: if bond.index == bond_index: matched = True break if not matched: logging.info("Angle index provided is out of range. Nothing was changed.") return self i, j = bond.atom_indices self.ase_molecule.set_distance( a0=i, a1=j, distance=length, mask=bond.mask, fix=0 ) bond.length = length self.update_coords_from(mol_type="ase") return self def set_angle(self, angle_index, angle): """ A method that will set the angle of an Angle object accordingly """ assert isinstance( angle, (int, float)), "Plese provide a float or an int for the angle" matched = False for a in self.angles: if a.index == angle_index: matched = True break if not matched: logging.info("Angle index provided is out of range. Nothing was changed.") return self i, j, k = a.atom_indices self.ase_molecule.set_angle( a1=i, a2=j, a3=k, angle=angle, mask=a.mask ) a.degree = angle self.update_coords_from(mol_type="ase") return self def set_torsion(self, torsion_index, dihedral): """ A method that will set the diehdral angle of a Torsion object accordingly. """ assert isinstance( dihedral, (int, float)), "Plese provide a float or an int for the diehdral angle" matched = False for torsion in self.torsions: if torsion.index == torsion_index: matched = True break if not matched: logging.info("Torsion index provided is out of range. Nothing was changed.") return self i, j, k, l = torsion.atom_indices self.ase_molecule.set_dihedral( a1=i, a2=j, a3=k, a4=l, angle=dihedral, mask=torsion.mask ) torsion.dihedral = dihedral self.update_coords_from(mol_type="ase") return self def set_cistrans(self, cistrans_index, stero="E"): """ A module that will set a corresponding cistrans bond to the proper E/Z config """ assert stero.upper() in [ "E", "Z"], "Please specify a valid stero direction." matched = False for cistrans in self.cistrans: if cistrans.index == cistrans_index: matched = True break if not matched: logging.info("CisTrans index provided is out of range. Nothing was changed.") return self if cistrans.stero == stero.upper(): self.update_coords_from("ase") return self else: cistrans.stero = stero.upper() i, j, k, l = cistrans.atom_indices self.ase_molecule.rotate_dihedral( a1=i, a2=j, a3=k, a4=l, angle=float(180), mask=cistrans.mask ) cistrans.stero = stero.upper() self.update_coords_from(mol_type="ase") return self def set_chirality(self, chiral_center_index, stero="R"): """ A module that can set the orientation of a chiral center. """ assert stero.upper() in ["R", "S"], "Specify a valid stero orientation" centers_dict = { 'R': Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW, 'S': Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW } assert isinstance(chiral_center_index, int), "Please provide an integer for the index" rdmol = self.rdkit_molecule.__copy__() match = False for chiral_center in self.chiral_centers: if chiral_center.index == chiral_center_index: match = True break if not match: logging.info("ChiralCenter index provided is out of range. Nothing was changed") return self rdmol.GetAtomWithIdx(chiral_center.atom_index).SetChiralTag( centers_dict[stero.upper()]) rdkit.Chem.rdDistGeom.EmbedMolecule(rdmol) old_torsions = self.torsions[:] + self.cistrans[:] self.rdkit_molecule = rdmol self.update_coords_from(mol_type="rdkit") # Now resetting dihedral angles in case if they changed. for torsion in old_torsions: i, j, k, l = torsion.atom_indices self.ase_molecule.set_dihedral( a1=i, a2=j, a3=k, a4=l, mask=torsion.mask, angle=torsion.dihedral, ) self.update_coords_from(mol_type="ase") return self def calculate_symmetry_number(self): from rmgpy.qm.symmetry import PointGroupCalculator from rmgpy.qm.qmdata import QMData atom_numbers = self.ase_molecule.get_atomic_numbers() coordinates = self.ase_molecule.get_positions() qmdata = QMData( groundStateDegeneracy=1, # Only needed to check if valid QMData numberOfAtoms=len(atom_numbers), atomicNumbers=atom_numbers, atomCoords=(coordinates, str('angstrom')), energy=(0.0, str('kcal/mol')) # Only needed to avoid error ) settings = type(str(''), (), dict(symmetryPath=str( 'symmetry'), scratchDirectory="."))() # Creates anonymous class pgc = PointGroupCalculator(settings, self.smiles, qmdata) pg = pgc.calculate() #os.remove("{}.symm".format(self.smiles)) if pg is not None: symmetry_number = pg.symmetryNumber else: symmetry_number = 1 return symmetry_number
class TS(Conformer): """ A class that defines the 3D geometry of a transition state (TS) """ def __init__(self, smiles=None, reaction_label=None, rmg_molecule=None, reaction_family="H_Abstraction", distance_data=None, index=0): self.energy = None ##################################################### ##################################################### assert reaction_label, "A reaction label needs to be provided in addition to a smiles or rmg_molecule" self.reaction_label = reaction_label self._rdkit_molecule = None self._ase_molecule = None self.reaction_family = reaction_family self.distance_data = distance_data self.index = index if (smiles or rmg_molecule): if smiles and rmg_molecule: assert rmg_molecule.isIsomorphic( RMGMolecule(SMILES=smiles) ), "SMILES string did not match RMG Molecule object" self.smiles = smiles self.rmg_molecule = rmg_molecule elif rmg_molecule: self.rmg_molecule = rmg_molecule self.smiles = rmg_molecule.toSMILES() else: self.smiles = smiles self.rmg_molecule = RMGMolecule(SMILES=smiles) self.rmg_molecule.updateMultiplicity() self.get_mols() self.get_geometries() else: self.smiles = None self.rmg_molecule = None self.rdkit_molecule = None self._pseudo_geometry = None self.ase_molecule = None self.bonds = [] self.angles = [] self.torsions = [] self.cistrans = [] self.chiral_centers = [] def __repr__(self): return '<TS "{}">'.format(self.smiles) def copy(self): copy_conf = TS(reaction_label=self.reaction_label, reaction_family=self.reaction_family) copy_conf.smiles = self.smiles copy_conf.rmg_molecule = self.rmg_molecule.copy() copy_conf.rdkit_molecule = self.rdkit_molecule.__copy__() copy_conf._pseudo_geometry = self._pseudo_geometry.__copy__() copy_conf.ase_molecule = self.ase_molecule.copy() copy_conf.get_geometries() return copy_conf @property def rdkit_molecule(self): if (self._rdkit_molecule is None) and self.distance_data: self._rdkit_molecule = self.get_rdkit_mol(self.rmg_molecule, self.reaction_family, self.distance_data)[0] return self._rdkit_molecule @property def ase_molecule(self): if (self._ase_molecule is None): self._ase_molecule = self.get_ase_mol() return self._ase_molecule def get_rdkit_mol(self, rmg_molecule=None, reaction_family="H_Abstraction", distance_data=None): """ A method to create an rdkit geometry... slightly different than that of the conformer method returns both the rdkit_molecule and the bm """ if not rmg_molecule: rmg_molecule = self.rmg_molecule rdkit_molecule = Chem.RWMol( Conformer().get_rdkit_mol(rmg_molecule=rmg_molecule)) labels, atom_match = self.get_labels(rmg_molecule, reaction_family) for i, atom in enumerate(rmg_molecule.atoms): assert atom.number == rdkit_molecule.GetAtoms()[i].GetAtomicNum() if len(labels) == 3: rd_copy = rdkit_molecule.__copy__() lbl1, lbl2, lbl3 = labels if not rd_copy.GetBondBetweenAtoms(lbl1, lbl2): rd_copy.AddBond(lbl1, lbl2, order=rdkit.Chem.rdchem.BondType.SINGLE) else: rd_copy.AddBond(lbl2, lbl3, order=rdkit.Chem.rdchem.BondType.SINGLE) self._pseudo_geometry = rd_copy logging.info("Initially embedded molecule") bm = None if distance_data: logging.info("Getting bounds matrix") bm = self.get_bounds_matrix(rdkit_molecule=rdkit_molecule) if len(labels) > 0: logging.info("Editing bounds matrix") bm = self.edit_matrix(rmg_molecule, bm, labels, distance_data) logging.info("Performing triangle smoothing on bounds matrix.") DistanceGeometry.DoTriangleSmoothing(bm) logging.info("Now attempting to embed using edited bounds matrix.") rdkit_molecule = self.rd_embed(rdkit_molecule, 10000, bm=bm, match=atom_match)[0] return rdkit_molecule, bm def get_bounds_matrix(self, rmg_molecule=None, rdkit_molecule=None): """ A method to obtain the bounds matrix """ if not rmg_molecule: try: rmg_molecule = self.rmg_molecule except BaseException: return None if not rdkit_molecule: try: rdkit_molecule = self.get_rdkit_mol(rmg_molecule=rmg_molecule) except BaseException: return None logging.info("before") bm = rdDistGeom.GetMoleculeBoundsMatrix(rdkit_molecule) logging.info("Got bounds matrix") return bm def set_limits(self, bm, lbl1, lbl2, value, uncertainty): """ A method to set the limits of a particular distance between two atoms :param bm: an array of arrays corresponding to the bounds matrix :param lbl1: the label of one atom :param lbl2: the label of another atom :param value: the distance from a distance data object (float) :param uncertainty: the uncertainty of the `value` distance (float) :return bm: an array of arrays corresponding to the edited bounds matrix """ logging.info( "For atoms {0} and {1} we have a distance of: \t {2}".format( lbl1, lbl2, value)) if lbl1 > lbl2: bm[lbl2][lbl1] = value + uncertainty / 2 bm[lbl1][lbl2] = max(0, value - uncertainty / 2) else: bm[lbl2][lbl1] = max(0, value - uncertainty / 2) bm[lbl1][lbl2] = value + uncertainty / 2 return bm def bm_pre_edit(self, bm, sect): """ Clean up some of the atom distance limits before attempting triangle smoothing. This ensures any edits made do not lead to unsolvable scenarios for the molecular embedding algorithm. sect is the list of atom indices belonging to one species. """ others = range(len(bm)) for idx in sect: others.remove(idx) for i in range(len(bm)): # sect: for j in range(i): # others: if i < j: continue for k in range(len(bm)): if k == i or k == j or i == j: continue Uik = bm[i, k] if k > i else bm[k, i] Ukj = bm[j, k] if k > j else bm[k, j] maxLij = Uik + Ukj - 0.1 if bm[i, j] > maxLij: logging.info("Changing lower limit {0} to {1}".format( bm[i, j], maxLij)) bm[i, j] = maxLij return bm def get_labels(self, rmg_molecule, reaction_family): """ A method to get the labeled atoms from a reaction :param reactants: a combined rmg_molecule object :return labels: the atom labels corresponding to the reaction center :return atomMatch: a tuple of tuples the atoms labels corresponding to the reaction center """ if len(rmg_molecule.getLabeledAtoms()) == 0: labeles = [] atomMatch = () if reaction_family.lower() in [ 'h_abstraction', 'r_addition_multiplebond', 'intra_h_migration' ]: # for i, atom in enumerate(reactants.atoms): lbl1 = rmg_molecule.getLabeledAtoms()["*1"].sortingLabel lbl2 = rmg_molecule.getLabeledAtoms()["*2"].sortingLabel lbl3 = rmg_molecule.getLabeledAtoms()["*3"].sortingLabel labels = [lbl1, lbl2, lbl3] atomMatch = ((lbl1, ), (lbl2, ), (lbl3, )) elif reaction_family.lower() in ['disproportionation']: lbl1 = rmg_molecule.getLabeledAtoms()["*2"].sortingLabel lbl2 = rmg_molecule.getLabeledAtoms()["*4"].sortingLabel lbl3 = rmg_molecule.getLabeledAtoms()["*1"].sortingLabel labels = [lbl1, lbl2, lbl3] atomMatch = ((lbl1, ), (lbl2, ), (lbl3, )) #logging.info("The labled atoms are {}.".format(labels)) return labels, atomMatch def edit_matrix(self, rmg_molecule, bm, labels, distance_data): """ A method to edit the bounds matrix using labels and distance data """ lbl1, lbl2, lbl3 = labels sect = [] for atom in rmg_molecule.split()[0].atoms: sect.append(atom.sortingLabel) uncertainties = {'d12': 0.02, 'd13': 0.02, 'd23': 0.02} bm = self.set_limits(bm, lbl1, lbl2, distance_data.distances['d12'], uncertainties['d12']) bm = self.set_limits(bm, lbl2, lbl3, distance_data.distances['d23'], uncertainties['d23']) bm = self.set_limits(bm, lbl1, lbl3, distance_data.distances['d13'], uncertainties['d13']) bm = self.bm_pre_edit(bm, sect) return bm def optimize(self, rdmol, boundsMatrix=None, atomMatch=None): """ Optimizes the rdmol object using UFF. Determines the energy level for each of the conformers identified in rdmol.GetConformer. :param rdmol: :param boundsMatrix: :param atomMatch: :return rdmol, minEid (index of the lowest energy conformer) """ energy = 0.0 minEid = 0 lowestE = 9.999999e99 # start with a very high number, which would never be reached for conf in rdmol.GetConformers(): if (boundsMatrix is None) or (atomMatch is None): AllChem.UFFOptimizeMolecule(rdmol, confId=conf.GetId()) energy = AllChem.UFFGetMoleculeForceField( rdmol, confId=conf.GetId()).CalcEnergy() else: _, energy = EmbedLib.OptimizeMol(rdmol, boundsMatrix, atomMatches=atomMatch, forceConstant=100000.0) if energy < lowestE: minEid = conf.GetId() lowestE = energy return rdmol, minEid def rd_embed(self, rdmol, numConfAttempts, bm=None, match=None): """ This portion of the script is literally taken from rmgpy but hacked to work without defining a geometry object Embed the RDKit molecule and create the crude molecule file. """ if (bm is None) or (match is None): AllChem.EmbedMultipleConfs(rdmol, numConfAttempts, randomSeed=1) rdmol, minEid = self.optimize(rdmol) else: """ Embed the molecule according to the bounds matrix. Built to handle possible failures of some of the embedding attempts. """ rdmol.RemoveAllConformers() for i in range(0, numConfAttempts): try: EmbedLib.EmbedMol(rdmol, bm, atomMatch=match) break except ValueError: logging.info( "RDKit failed to embed on attempt {0} of {1}".format( i + 1, numConfAttempts)) except RuntimeError: logging.info("RDKit failed to embed.") else: logging.error("RDKit failed all attempts to embed") return None, None """ RDKit currently embeds the conformers and sets the id as 0, so even though multiple conformers have been generated, only 1 can be called. Below the id's are resolved. """ for i in range(len(rdmol.GetConformers())): rdmol.GetConformers()[i].SetId(i) rdmol, minEid = self.optimize(rdmol, boundsMatrix=bm, atomMatch=match) return rdmol, minEid def get_bonds(self): return Conformer().get_bonds(self._pseudo_geometry, self.ase_molecule, self.rmg_molecule) def get_torsions(self): return Conformer().get_torsions(self._pseudo_geometry, self.ase_molecule) def get_angles(self): return Conformer().get_angles(self._pseudo_geometry, self.ase_molecule)