def find_equivelant_torsions(mapped_mol, restricted=False, central_bonds=None): """ Final all torsions around a given central bond Parameters ---------- mapped_mol: oemol. Must contaion map indices restricted: bool, optional, default False If True, will also find restricted torsions central_bonds: list of tuple of ints, optional, defualt None If provides, only torsions around those central bonds will be given. If None, all torsions in molecule will be found Returns ------- eq_torsions: dict maps central bond to all equivelant torisons """ #ToDo check that mol has mapping from openeye import oechem mol = oechem.OEMol(mapped_mol) if not has_atom_map(mol): raise ValueError("OEMol must have map indices") terminal_smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]' terminal_torsions = _find_torsions_from_smarts(mol, terminal_smarts) mid_torsions = [[tor.a, tor.b, tor.c, tor.d] for tor in oechem.OEGetTorsions(mapped_mol)] all_torsions = terminal_torsions + mid_torsions if restricted: restricted_smarts = '[*]~[C,c]=,@[C,c]~[*]' restricted_torsions = _find_torsions_from_smarts(mol, restricted_smarts) all_torsions = all_torsions + restricted_torsions tor_idx = [] for tor in all_torsions: tor_name = (tor[0].GetMapIdx()-1, tor[1].GetMapIdx()-1, tor[2].GetMapIdx()-1, tor[3].GetMapIdx()-1) tor_idx.append(tor_name) if central_bonds: if not isinstance(central_bonds, list): central_bonds = [central_bonds] if not central_bonds: central_bonds = set((tor[1], tor[2]) for tor in tor_idx) eq_torsions = {cb : [tor for tor in tor_idx if cb == (tor[1], tor[2]) or cb ==(tor[2], tor[1])] for cb in central_bonds} return eq_torsions
def GetAdjacentTorsions(mol, refTorsion): ''' Returns all torsions that are 0 or 1 path length away from the reference torsion @param mol: OEGraphMol @param refTorsion: OETorsion @return: int ''' adjTorsions = [] PATH_LENGTH_THRESHOLD = 1 torset = { str(refTorsion.b.GetIdx()) + "_" + str(refTorsion.c.GetIdx()): True } torset[str(refTorsion.c.GetIdx()) + "_" + str(refTorsion.b.GetIdx())] = True pred = oechem.OEAndBond(oechem.OEHasOrder(1), oechem.OENotBond(oechem.OEBondIsInRing())) for adjTorsion in oechem.OEGetTorsions(mol, pred): # skip nitrile order_ab = adjTorsion.a.GetBond(adjTorsion.b).GetOrder() order_cd = adjTorsion.c.GetBond(adjTorsion.d).GetOrder() if order_ab == 3 or order_cd == 3: continue # skip torsions involving terminal -N-H if adjTorsion.a.IsHydrogen() and adjTorsion.b.IsNitrogen(): continue if adjTorsion.d.IsHydrogen() and adjTorsion.c.IsNitrogen(): continue key1 = str(adjTorsion.b.GetIdx()) + "_" + str( adjTorsion.c.GetIdx()) key2 = str(adjTorsion.c.GetIdx()) + "_" + str( adjTorsion.b.GetIdx()) if key1 in torset or key2 in torset: continue pathLen = TorsionGenerator.GetMinPathLength(refTorsion, adjTorsion) if pathLen <= PATH_LENGTH_THRESHOLD: adjTorsions.append(adjTorsion) torset[key1] = True torset[key2] = True return adjTorsions
def get_torsion(mol, rot_bond): """ Find torsion in fragment that corresponds to same rotatable bond in parent molecule parameters: ---------- mol: OEMol with map indices to parent molecule rot_bond: tuple (mapidx_1, mapidx_2) return: ------- dihdral: list of dihedral atom indices """ tors = [[tor.a, tor.b, tor.c, tor.d] for tor in oechem.OEGetTorsions(mol)] filtered_torsions = torsions.one_torsion_per_rotatable_bond(tors) mapped_tors = [[i.GetMapIdx() for i in t] for t in filtered_torsions] cbs = [(t[1], t[2]) for t in mapped_tors] try: dihedral = mapped_tors[cbs.index(rot_bond)] except ValueError: dihedral = mapped_tors[cbs.index(tuple(reversed(rot_bond)))] dihedral = [d - 1 for d in dihedral] return dihedral
def find_torsion_around_bond(molecule, bond): """ Find the torsion around a given bond Parameters ---------- molecule : molecule with atom maps bond : tuple of map idx of bond atoms Returns ------- list of 4 atom map idx (-1) Note: This returns the map indices of the torsion -1, not the atom indices. """ from openeye import oechem if not has_atom_map(molecule): raise ValueError("Molecule must have atom maps") #torsions = [[tor.a, tor.b, tor.c, tor.d ] for tor in oechem.OEGetTorsions(molecule)] terminal_smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]' terminal_torsions = _find_torsions_from_smarts(molecule, terminal_smarts) mid_torsions = [[tor.a, tor.b, tor.c, tor.d] for tor in oechem.OEGetTorsions(molecule)] all_torsions = terminal_torsions + mid_torsions tors = one_torsion_per_rotatable_bond(all_torsions) tor_idx = [tuple(i.GetMapIdx() for i in tor) for tor in tors] central_bonds = [(tor[1], tor[2]) for tor in tor_idx] try: idx = central_bonds.index(bond) except ValueError: idx = central_bonds.index(tuple(reversed(bond))) torsion = [i-1 for i in tor_idx[idx]] return torsion
def find_torsions(molecule, restricted=True, terminal=True): #ToDo: Get rid of equivalent torsions. Ex H-C-C-C and C-C-C-H. """ This function takes an OEMol (atoms must be tagged with index map) and finds the map indices for torsion that need to be driven. Parameters ---------- molecule : OEMol The atoms in the molecule need to be tagged with map indices restricted: bool, optional, default True If True, will find restricted torsions such as torsions in rings and double bonds. terminal: bool, optional, default True If True, will find terminal torsions Returns ------- needed_torsion_scans: dict a dictionary that maps internal, terminal and restricted torsions to map indices of torsion atoms """ # Check if molecule has map from openeye import oechem is_mapped = has_atom_map(molecule) if not is_mapped: utils.logger().warning('Molecule does not have atom map. A new map will be generated. You might need a new tagged SMARTS if the ordering was changed') tagged_smiles = mol_to_smiles(molecule, isomeric=True, mapped=True, explicit_hydrogen=True) # Generate new molecule with tags molecule = chemi.smiles_to_oemol(tagged_smiles) utils.logger().warning('If you already have a tagged SMARTS, compare it with the new one to ensure the ordering did not change') utils.logger().warning('The new tagged SMARTS is: {}'.format(tagged_smiles)) # ToDo: save the new tagged SMILES somewhere. Maybe return it? needed_torsion_scans = {'internal': {}, 'terminal': {}, 'restricted': {}} mol = oechem.OEMol(molecule) if restricted: smarts = '[*]~[C,c]=,@[C,c]~[*]' # This should capture double bonds (not capturing rings because OpenEye does not # generate skewed conformations. ToDo: use scan in geometric or something else to get this done. restricted_tors = _find_torsions_from_smarts(molecule=mol, smarts=smarts) if len(restricted_tors) > 0: restricted_tors_min = one_torsion_per_rotatable_bond(restricted_tors) for i, tor in enumerate(restricted_tors_min): tor_name = ((tor[0].GetMapIdx() - 1), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1)) needed_torsion_scans['restricted']['torsion_{}'.format(str(i))] = tor_name if terminal: smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]' # This smarts should match terminal torsions such as -CH3, -NH2, -NH3+, -OH, and -SH h_tors = _find_torsions_from_smarts(molecule=mol, smarts=smarts) if len(h_tors) > 0: h_tors_min = one_torsion_per_rotatable_bond(h_tors) for i, tor in enumerate(h_tors_min): tor_name = ((tor[0].GetMapIdx() -1 ), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1)) needed_torsion_scans['terminal']['torsion_{}'.format(str(i))] = tor_name mid_tors = [[tor.a, tor.b, tor.c, tor.d ] for tor in oechem.OEGetTorsions(mol)] if mid_tors: mid_tors_min = one_torsion_per_rotatable_bond(mid_tors) for i, tor in enumerate(mid_tors_min): tor_name = ((tor[0].GetMapIdx() - 1), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1)) needed_torsion_scans['internal']['torsion_{}'.format(str(i))] = tor_name # Check that there are no duplicate torsions in mid and h_torsions list_tor = list(needed_torsion_scans['internal'].values()) + list(needed_torsion_scans['terminal'].values()) set_tor = set(list_tor) if not len(set_tor) == len(list_tor): raise Warning("There is a torsion defined in both mid and terminal torsions. This should not happen. Check " "your molecule and the atom mapping") return needed_torsion_scans
def get_canonical_torsions(mol): ''' Return unique torsions in canonical order. Only one torsion containing the same central two atoms are return Cannonical ordering is determined using the order of atoms in canonical smiles representation 1. generate a canonical smiles representation from the input molecule 2. create a list of (min(b_idx, c_idx), min(a_idx, d_idx), max(a_idx, d_idx), OETorsion) 3. sort the list in #2, extract subset with unique rotatable bonds :param mol: OEGraphMol :return: list[OEGraphMol] ''' CANONICAL_IDX_TAG = 'can_idx' def assign_canonical_idx(mol): for atom in mol.GetAtoms(): atom.SetMapIdx(0) for map_idx, atom in enumerate(mol.GetAtoms(oechem.OEIsHeavy())): atom.SetMapIdx(map_idx + 1) can_smiles = oechem.OEMolToSmiles(mol) can_mol = oechem.OEGraphMol() # smiles_opt = OEParseSmilesOptions(canon=True) # OEParseSmiles(can_mol, can_smiles, smiles_opt) oechem.OESmilesToMol(can_mol, can_smiles) for can_atom in can_mol.GetAtoms(oechem.OEIsHeavy()): atom = mol.GetAtom(oechem.OEHasMapIdx(can_atom.GetMapIdx())) atom.SetData(CANONICAL_IDX_TAG, can_atom.GetIdx()) try: assign_canonical_idx(mol) except Exception as e: print('Error GetCanonicalizedTorsions. ', e) return None torsions = [] for torsion in oechem.OEGetTorsions(mol, oechem.OEIsRotor()): if torsion.a.IsHydrogen() or torsion.b.IsHydrogen() or \ torsion.c.IsHydrogen() or torsion.d.IsHydrogen(): continue sum_bc = torsion.b.GetData(CANONICAL_IDX_TAG) + torsion.c.GetData( CANONICAL_IDX_TAG) min_bc = min(torsion.b.GetData(CANONICAL_IDX_TAG), torsion.c.GetData(CANONICAL_IDX_TAG)) max_bc = max(torsion.b.GetData(CANONICAL_IDX_TAG), torsion.c.GetData(CANONICAL_IDX_TAG)) min_ad = min(torsion.a.GetData(CANONICAL_IDX_TAG), torsion.d.GetData(CANONICAL_IDX_TAG)) max_ad = max(torsion.a.GetData(CANONICAL_IDX_TAG), torsion.d.GetData(CANONICAL_IDX_TAG)) torsions.append((sum_bc, min_bc, max_bc, min_ad, max_ad, torsion)) # sort torsions.sort(key=operator.itemgetter(0, 1, 2, 3, 4)) seen = {} unique_torsions = [] for _, _, _, _, _, torsion in torsions: bond = mol.GetBond(torsion.b, torsion.c) if bond is not None and bond.GetIdx() not in seen: unique_torsions.append(torsion) seen[bond.GetIdx()] = True # revert mol to original state for atom in mol.GetAtoms(oechem.OEIsHeavy()): atom.SetMapIdx(0) atom.DeleteData(CANONICAL_IDX_TAG) return unique_torsions
def GetTorsions(mol): ''' Goes through each rotatable bond in the molecule and extracts torsion atoms (a-b-c-d) Core torsion atoms are extended by one bond If core or extended atoms are part of a ring, then entire ring is kept Keep ortho substitution Keep functional groups that have at least one atom overlap with the core/extended torsion atoms Functional group inclusion criteria: - <= 5 heavy atoms - must contain at least one hetero atom - non-ring Add methyl cap if bond involving hetero atom is broken @param mol: OEGraphMol @type mol: OEGraphMol @return: list[OEGraphMol] ''' # mol = OEGraphMol(input_mol) oechem.OEAssignHybridization(mol) funcGrps = TorsionGenerator.GetFuncGroups(mol) includedTorsions = oechem.OEAtomBondSet() torsionMols = [] for atom in mol.GetAtoms(): atom.SetData("idx", atom.GetIdx() + 1) torsions = get_canonical_torsions(mol) if torsions is None: torsions = oechem.OEGetTorsions(mol, oechem.OEIsRotor()) for torsion in torsions: if torsion.a.IsHydrogen() or torsion.b.IsHydrogen() or \ torsion.c.IsHydrogen() or torsion.d.IsHydrogen(): continue torsion_bond = mol.GetBond(torsion.b, torsion.c) if includedTorsions.HasBond(torsion_bond): continue # if includedTorsions.HasAtom(torsion.b) and \ # includedTorsions.HasAtom(torsion.c): # continue # revert map idx to zero in original mol for atom in mol.GetAtoms(): atom.SetMapIdx(0) # includedTorsions.AddAtom(torsion.b) # includedTorsions.AddAtom(torsion.c) includedTorsions.AddBond(torsion_bond) torsionSet = oechem.OEAtomBondSet(mol.GetBonds()) torsionSet.AddAtoms([torsion.a, torsion.b, torsion.c, torsion.d]) for atom in torsionSet.GetAtoms(): atom.SetMapIdx(1) # extend core torsion atoms by one bond nbrs = TorsionGenerator.GetNbrs(torsionSet) torsionSet.AddAtoms(nbrs) # include ring atoms ringAtoms = TorsionGenerator.GetSameRingAtoms(mol, torsionSet) torsionSet.AddAtoms(ringAtoms) for atom in torsionSet.GetAtoms(): if not atom.GetMapIdx() == 1: atom.SetMapIdx(2) # add functional groups that overlap with torsion set TorsionGenerator.AddFuncGroupAtoms(funcGrps, torsionSet) # add relevant ring atoms (ortho substituents and ring H) TorsionGenerator.AddRelevantRingAtoms(mol, torsion, torsionSet) # special treatment for C=O for atom in torsionSet.GetAtoms( oechem.OEAndAtom( oechem.OEIsOxygen(), oechem.OEIsAtomHybridization( oechem.OEHybridization_sp2))): for nbr in atom.GetAtoms(): if torsionSet.HasAtom(nbr): for nbr2 in nbr.GetAtoms(oechem.OEIsHeavy()): if not torsionSet.HasAtom(nbr2): nbr2.SetMapIdx(2) torsionSet.AddAtom(nbr2) # mark bridging atom and cap if needed BRIDGE_ATOM_IDX = 4 TorsionGenerator.MarkBridgingAtoms(BRIDGE_ATOM_IDX, mol, torsionSet) A_IDX = 11 B_IDX = 12 C_IDX = 13 D_IDX = 14 torsion.a.SetMapIdx(A_IDX) torsion.b.SetMapIdx(B_IDX) torsion.c.SetMapIdx(C_IDX) torsion.d.SetMapIdx(D_IDX) torsionMol = oechem.OEGraphMol() oechem.OESubsetMol(torsionMol, mol, torsionSet, True) torsionMol.Sweep() torsionMols.append(torsionMol) # change bridge atom to Carbon for atom in torsionMol.GetAtoms( oechem.OEHasMapIdx(BRIDGE_ATOM_IDX)): atom.SetAtomicNum(oechem.OEElemNo_C) explicit_valence = atom.GetExplicitValence() if explicit_valence < 4: atom.SetImplicitHCount(4 - explicit_valence) TorsionGenerator.SetSDData(A_IDX, B_IDX, C_IDX, D_IDX, torsion, torsionMol) # set map idx to zero in torsion mol for atom in torsionMol.GetAtoms(): atom.SetMapIdx(0) # revert map idx to zero in original mol for atom in mol.GetAtoms(): atom.SetMapIdx(0) return torsionMols
def generate_torsions(inp_mol, output_path, interval, base_name=None, tar=True): """ This function takes a 3D molecule (pdf, mol2 or sd file) and generates structures for a torsion drive on all torsions in the molecule. This function uses OpenEye Parameters ---------- mol : OEMol molecule to generate 1D torsion scans output_path: str path to output file directory interval: int angle (in degrees) of interval for torsion drive base_name: str base name for file. Default is None. If default, use title in OEMol for base name tar: bool If true, will compress output """ if not base_name: base_name = inp_mol.GetTitle() mid_tors = [[tor.a, tor.b, tor.c, tor.d] for tor in oechem.OEGetTorsions(inp_mol)] # This smarts should match terminal torsions such as -CH3, -NH2, -NH3+, -OH, and -SH smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]' qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): warnings.warn('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) mol = oechem.OEMol(inp_mol) h_tors = [] oechem.OEPrepareSearch(mol, ss) unique = True for match in ss.Match(mol, unique): tor = [] for ma in match.GetAtoms(): tor.append(ma.target) h_tors.append(tor) # Combine middle and terminal torsions all_tors = mid_tors + h_tors # Sort all_tors so that it's grouped by central bond central_bonds = np.zeros((len(all_tors), 3), dtype=int) for i, tor in enumerate(all_tors): central_bonds[i][0] = i central_bonds[i][1] = tor[1].GetIdx() central_bonds[i][2] = tor[2].GetIdx() grouped = central_bonds[central_bonds[:, 2].argsort()] sorted_tors = [all_tors[i] for i in grouped[:, 0]] # Keep only one torsion per rotatable bond tors = [] best_tor = [ sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0] ] first_pass = True for tor in sorted_tors: logger().info("Idxs: {} {} {} {}".format(tor[0].GetIdx(), tor[1].GetIdx(), tor[2].GetIdx(), tor[3].GetIdx())) logger().info("Atom Numbers: {} {} {} {}".format( tor[0].GetAtomicNum(), tor[1].GetAtomicNum(), tor[2].GetAtomicNum(), tor[3].GetAtomicNum())) if tor[1].GetIdx() != best_tor[1].GetIdx() or tor[2].GetIdx( ) != best_tor[2].GetIdx(): new_tor = True if not first_pass: logger().info("Adding to list: {} {} {} {}".format( best_tor[0].GetIdx(), best_tor[1].GetIdx(), best_tor[2].GetIdx(), best_tor[3].GetIdx())) tors.append(best_tor) first_pass = False best_tor = tor best_tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum() logger().info( "new_tor with central bond across atoms: {} {}".format( tor[1].GetIdx(), tor[2].GetIdx())) else: logger().info("Not a new_tor but now with end atoms: {} {}".format( tor[0].GetIdx(), tor[3].GetIdx())) tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum() if tor_order > best_tor_order: best_tor = tor best_tor_order = tor_order logger().info("Adding to list: {} {} {} {}".format(best_tor[0].GetIdx(), best_tor[1].GetIdx(), best_tor[2].GetIdx(), best_tor[3].GetIdx())) tors.append(best_tor) logger().info("List of torsion to drive:") for tor in tors: logger().info("Idx: {} {} {} {}".format(tor[0].GetIdx(), tor[1].GetIdx(), tor[2].GetIdx(), tor[3].GetIdx())) logger().info("Atom numbers: {} {} {} {}".format( tor[0].GetAtomicNum(), tor[1].GetAtomicNum(), tor[2].GetAtomicNum(), tor[3].GetAtomicNum())) conf = mol.GetConfs().next() coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3) conf.GetCoords(coords) # Check if coordinates are not zero values = np.asarray( [coords.__getitem__(i) == 0 for i in range(coords.__len__())]) if values.all(): # Generate new coordinates. mol2 = generate_conformers(mol, max_confs=1) conf = mol2.GetConfs().next() coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3) conf.GetCoords(coords) mol2.DeleteConfs() mol.DeleteConfs() for tor in tors: tor_name = str((tor[0].GetIdx()) + 1) + '_' + str( (tor[1].GetIdx()) + 1) + '_' + str( (tor[2].GetIdx()) + 1) + '_' + str((tor[3].GetIdx()) + 1) folder = os.path.join(output_path, tor_name) try: os.makedirs(folder) except FileExistsError: logger().info("Overwriting existing directory {}".format(tor_name)) for angle in range(0, 360, interval): angle_folder = os.path.join(folder, str(angle)) try: os.mkdir(angle_folder) except FileExistsError: logger().info( "Overwriting existing directory {}".format(tor_name)) newconf = mol.NewConf(coords) oechem.OESetTorsion(newconf, tor[0], tor[1], tor[2], tor[3], radians(angle)) pdb = oechem.oemolostream('{}/{}_{}_{}.pdb'.format( angle_folder, base_name, tor_name, angle)) oechem.OEWritePDBFile(pdb, newconf) if tar: # tar archive output out = tarfile.open('{}.tar.gz'.format(output_path), mode='w:gz') os.chdir(output_path) os.chdir('../') out.add('{}'.format(base_name)) out.close()