def fixResidueSeqNumbers(mol): curr_seq_no = 0 res_start = 0 res_end = 0 num_atoms = mol.getNumAtoms() old_res_seq_nos = [] old_to_new_seq_no_map = {} for atom in mol.atoms: old_res_seq_nos.append(Biomol.getResidueSequenceNumber(atom)) while res_start < num_atoms: while res_end < num_atoms and (old_res_seq_nos[res_start] == old_res_seq_nos[res_end]): res_end += 1 if not old_res_seq_nos[res_start] in old_to_new_seq_no_map: old_to_new_seq_no_map[old_res_seq_nos[res_start]] = curr_seq_no while res_start < res_end: Biomol.setResidueSequenceNumber(mol.getAtom(res_start), curr_seq_no) res_start += 1 curr_seq_no += 1 return old_res_seq_nos, old_to_new_seq_no_map
def cdfMol_pdb(pdb, output, name): initial_time = time.time() cdf_mol = Chem.BasicMolecule() pdb_mol = Chem.BasicMolecule() pdb_str = open(pdb, 'r').read().replace('WAT', 'HOH').replace('HIE', 'HIS') pdb_reader = Biomol.PDBMoleculeReader(Base.StringIOStream(pdb_str)) Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter( pdb_reader, True) if not pdb_reader.read(pdb_mol): return None Chem.calcImplicitHydrogenCounts(pdb_mol, False) Chem.perceiveHybridizationStates(pdb_mol, False) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.perceiveSSSR(pdb_mol, False) Chem.setRingFlags(pdb_mol, False) Chem.setAromaticityFlags(pdb_mol, False) cdf_mol.assign(pdb_mol) for atom in cdf_mol.atoms: Chem.set3DCoordinatesArray(atom, Math.Vector3DArray()) i = 0 while i < cdf_mol.numAtoms: Chem.get3DCoordinatesArray(cdf_mol.getAtom(i)).addElement( Chem.get3DCoordinates(pdb_mol.getAtom(i))) i += 1 tmp_output = output + name + ".cdf" try: Chem.FileCDFMolecularGraphWriter(tmp_output).write(cdf_mol) except: print('> Cdf_mol writing failure.') raise residues = Biomol.ResidueList(cdf_mol) tmp_output = output + name + "_residue_info.txt" with open(tmp_output, 'w') as txt_writer: txt_writer.write('residue name_resid_chain\n') for res in residues: res_id = getResidueID(res) txt_writer.write('{}: \n'.format(res_id)) calc_time = time.time() - initial_time print('> Cdf and amino acid residue number list files generated in {}s'. format(int(calc_time)))
def _CDPLextractProteinFragments(pdb_mol, lig_three_letter_code, radius=6.0): lig = Chem.Fragment() _CDPLcalcProteinProperties(pdb_mol) # extract ligand for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == lig_three_letter_code: Biomol.extractResidueSubstructure(atom, pdb_mol, lig, False) if lig.numAtoms == 0: log.error("The defined three letter code is not existing:", lig_three_letter_code) # extract environment env = Chem.Fragment() Biomol.extractEnvironmentResidues(lig, pdb_mol, env, float(radius)) return env, lig
def _CDPLcalcProteinProperties( pdb_mol): # TODO is this the right way to handle protein structures? ''' PRIVAT METHOD configures a CDPL BasicMolecule for a protein structure. Is used in the _CDPLextractProteinFragments method \n Input: \n pdb_mol (CDPL BasicMolecule): a CDPL BasicMolecule representing the protein structure \n ''' Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False)
def getBackboneAtoms(res): atoms = [] for atom in res.atoms: if Biomol.isPDBBackboneAtom(atom): atoms.append(atom) return atoms
def _removeWater(mol): to_remove = list() for atom in mol.atoms: if Biomol.getResidueCode(atom) == 'HOH': to_remove.append(atom) for atom in to_remove: mol.removeAtom(mol.getAtomIndex(atom))
def getPh4InteractionDictionary(cdf_path, ligand_code): ph4_interaction_dictionary = {} cdf_mol = loadCDFMolecule(cdf_path) num_confs = Chem.getNumConformations(cdf_mol) ligand = Chem.Fragment() for atom in cdf_mol.atoms: if Biomol.getResidueCode(atom) == ligand_code: Biomol.extractResidueSubstructure(atom, cdf_mol, ligand, False) break if ligand.numAtoms == 0: print('> Could not find ligand {}'.format(ligand_code)) return 0 Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(True) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() for y in range(num_confs): lig_pharm.clear() env_pharm.clear() interactions.clear() lig_env.clear() coords_func = Chem.AtomConformer3DCoordinatesFunctor(y) pharm_gen.setAtom3DCoordinatesFunction(coords_func) Biomol.extractEnvironmentResidues(ligand, cdf_mol, lig_env, coords_func, 7) Chem.perceiveSSSR(lig_env, True) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) analyzer.analyze(lig_pharm, env_pharm, interactions) ph4_interaction_dictionary[y] = getPh4Interactions( lig_pharm, interactions) return ph4_interaction_dictionary
def readPDBFromStream(stream: Base.IOStream): from Protein import Protein from MoleculeTools import sanitize_mol r = Biomol.PDBMoleculeReader(stream) mol = Chem.BasicMolecule() r.read(mol) sanitize_mol(mol, makeHydrogenComplete=True) return Protein(mol)
def generate_key(ftr): first_atom = Pharm.getSubstructure(ftr).atoms[0] base = str(ftype_names[Pharm.getType(ftr)]) + '[' + str( Biomol.getResidueCode(first_atom)) + '_' + str( Biomol.getResidueSequenceNumber(first_atom)) + '_' + str( Biomol.getChainID(first_atom)) atoms_list = [] for a in Pharm.getSubstructure(ftr).atoms: if Biomol.hasSerialNumber(a) == False: continue atom_id = str(Biomol.getSerialNumber(a)) atoms_list.append(atom_id) atom_key = "" for k in sorted(atoms_list, key=natural_sort_key, reverse=True): atom_key += '_' + k key = base + atom_key + ']' return key
def __init__(self, lig_feature, env_feature): ftype_names = { Pharm.FeatureType.H_BOND_ACCEPTOR: 'HBA', Pharm.FeatureType.H_BOND_DONOR: 'HBD', Pharm.FeatureType.POS_IONIZABLE: 'PI', Pharm.FeatureType.NEG_IONIZABLE: 'NI', Pharm.FeatureType.AROMATIC: 'AR', Pharm.FeatureType.HYDROPHOBIC: 'H', Pharm.FeatureType.X_VOLUME: 'XV' } lig_feature_type = ftype_names[Pharm.getType(lig_feature)] lig_residue_code = Biomol.getResidueCode( Pharm.getSubstructure(lig_feature).atoms[0]) lig_residue_number = Biomol.getResidueSequenceNumber( Pharm.getSubstructure(lig_feature).atoms[0]) lig_residue_chain = Biomol.getChainID( Pharm.getSubstructure(lig_feature).atoms[0]) env_feature_type = ftype_names[Pharm.getType(env_feature)] env_residue_code = Biomol.getResidueCode( Pharm.getSubstructure(env_feature).atoms[0]) env_residue_number = Biomol.getResidueSequenceNumber( Pharm.getSubstructure(env_feature).atoms[0]) env_residue_chain = Biomol.getChainID( Pharm.getSubstructure(env_feature).atoms[0]) self.interaction_type = '{}-{}'.format(lig_feature_type, env_feature_type) self.lig_residue = '{}_{}_{}'.format(lig_residue_code, lig_residue_number, lig_residue_chain) self.env_residue = '{}_{}_{}'.format(env_residue_code, env_residue_number, env_residue_chain) atoms = [] for atom in Pharm.getSubstructure(lig_feature).atoms: key_atom = '{}:{}'.format(Chem.getSymbol(atom), Biomol.getSerialNumber(atom)) atoms.append(key_atom) self.lig_atom = sorted(atoms, key=lambda k: int(k.split(':')[1])) atoms = [] for atom in Pharm.getSubstructure(env_feature).atoms: key_atom = '{}:{}'.format(Chem.getSymbol(atom), Biomol.getSerialNumber(atom)) atoms.append(key_atom) self.env_atom = sorted(atoms, key=lambda k: int(k.split(':')[1]))
def _CDPLreadFromPDBFile(pdb_file): ''' PRIVAT METHOD reads a pdb file and is used by the CDPLreadProteinFile method. Input: \n pdb_file (string): the path to the pdb file \n Return: \n (CDPL BasicMolecule): the corresponding pdb molecule ''' ifs = Base.FileIOStream(pdb_file, 'r') pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter( pdb_reader, False ) #TODO Should this be there for the pdb readin? or also in the config? if not pdb_reader.read(pdb_mol): log.error("COULD NOT READ PDB", pdb_file) return False return pdb_mol
def getResidueID(res): if Biomol.getChainID(res) != ' ': return Biomol.getResidueCode(res) + '_' + str( Biomol.getResidueSequenceNumber(res)) + '_' + Biomol.getChainID( res) return Biomol.getResidueCode(res) + '_' + str( Biomol.getResidueSequenceNumber(res))
def filterResidues(residues, res_subset): if len(res_subset) == 0: return i = 0 while i < len(residues): seq_no = Biomol.getResidueSequenceNumber(residues[i]) if seq_no in res_subset: i += 1 continue del residues[i]
def removeLigands(self, keep: bool = False, removeWater: bool = True) -> None: """ Removes all entities from the protein which are not an amino acid --> usually a ligand. Be careful with peptides and covalently bound ligands! These will face a different behaviour and might cause unexpected results! :param keep: Whether to keep the removed ligands stored in the ligand property or simply remove them. :param removeWater: If true, removes the water molecules :return: """ from ProteinTools import getMoleculeFromAtom atomsToRemoveFromProtein = [] for atom in self.atoms: index = self.getAtomIndex(atom) if index in atomsToRemoveFromProtein: continue ligandCode = Biomol.getResidueCode(atom) if ligandCode == "HOH": if removeWater: atomsToRemoveFromProtein.append(index) continue else: raise NotImplementedError if ligandCode not in THREE_LETTER_AMINO_ACID_CODES: ligand, atomsToRemove = getMoleculeFromAtom(atom, self) print(atomsToRemove) atomsToRemoveFromProtein.extend(atomsToRemove) if keep: self.ligands.append(ligand) atomsToRemoveFromProtein = list(set(atomsToRemoveFromProtein)) atomsToRemoveFromProtein.sort() for i in reversed(atomsToRemoveFromProtein): self.removeAtom(i)
def getAlphaCAtom(res): for atom in res.atoms: if Biomol.getResidueAtomName(atom) == 'CA': return [atom] return []
def setResidueSeqNumbers(mol, seq_nos): i = 0 while i < len(seq_nos): Biomol.setResidueSequenceNumber(mol.getAtom(i), seq_nos[i]) i += 1
def writePDB(path: str, protein: Chem.BasicMolecule) -> None: s = Base.FileIOStream(path) w = Biomol.PDBMolecularGraphWriter(s) w.write(protein) w.close()
def process(): if len(sys.argv) < 3: print >> sys.stderr, 'Usage:', sys.argv[ 0], '[input CDF-file] [output CDF-file] [[residue subset]]' sys.exit(2) print '- Processing CDF-file:', sys.argv[1], '...' mol = Util.loadCDFMolecule(sys.argv[1]) if not mol: print '!! Could not read file' sys.exit(2) residues = Biomol.ResidueList(mol) print '- Num. residues:', residues.getSize() if len(sys.argv) > 3: res_subset_ids = Util.toIntegerList(Util.readLines(sys.argv[3])) print '- Residue subset:', res_subset_ids Util.filterResidues(residues, res_subset_ids) print '- num residues', len(residues) num_confs = Chem.getNumConformations(mol) print '- Num. frames:', num_confs print '- Aligning frames...' res_positions = [] for res in residues: atoms = Util.getBackboneAtoms(res) positions = [] i = 0 while i < num_confs: positions.append(Util.calcAtomSetCentroid(atoms, i)) i += 1 res_positions.append(positions) alignment = Math.DKabschAlgorithm() al_ref_positions = Math.DMatrix(3, residues.getSize()) al_positions = Math.DMatrix(3, residues.getSize()) i = 0 while i < residues.getSize(): pos = res_positions[i][0] al_ref_positions.setElement(0, i, pos[0]) al_ref_positions.setElement(1, i, pos[1]) al_ref_positions.setElement(2, i, pos[2]) i += 1 i = 1 xform = Math.Matrix4D() while i < num_confs: j = 0 while j < residues.getSize(): pos = res_positions[j][i] al_positions.setElement(0, j, pos[0]) al_positions.setElement(1, j, pos[1]) al_positions.setElement(2, j, pos[2]) j += 1 if not alignment.align(al_positions, al_ref_positions): print '!! Could not align frame', i else: xform.assign(alignment.getTransform()) Chem.transformConformation(mol, i, xform) i += 1 if not Util.saveCDFMolecule(sys.argv[2], mol): print '!! Could not write output file' sys.exit(2)
def process(): if len(sys.argv) < 4: print >> sys.stderr, 'Usage:', sys.argv[ 0], '[input topology-file] [input coordinates-file] [output CDF-file]' sys.exit(2) print >> sys.stderr, '- Processing topology-file', sys.argv[ 1], 'and coordinates-file', sys.argv[2], '...' u = MDAnalysis.Universe(sys.argv[1], sys.argv[2]) cdf_mol = Chem.BasicMolecule() cdf_mol.reserveMemoryForAtoms(len(u.atoms)) cdf_mol.reserveMemoryForBonds(len(u.bonds)) print >> sys.stderr, '- Num. atoms:', len(u.atoms) print >> sys.stderr, '- Num. bonds:', len(u.bonds) num_frames = len(u.trajectory) print >> sys.stderr, '- Num. frames:', num_frames # construct atoms print >> sys.stderr, '- Building atoms ...' waters = {} i = 0 for md_atom in u.atoms: atom = cdf_mol.addAtom() sym = MDAnalysis.topology.guessers.guess_atom_element(md_atom.name) Chem.setSymbol(atom, sym.title()) Chem.setImplicitHydrogenCount(atom, 0) Biomol.setChainID(atom, md_atom.segid) if md_atom.resname == 'WAT': Biomol.setResidueCode(atom, 'HOH') else: Biomol.setResidueCode(atom, md_atom.resname) if Biomol.getResidueCode(atom) == 'HOH': if md_atom.resid in waters: waters[md_atom.resid].append(i) else: waters[md_atom.resid] = [i] Biomol.setResidueSequenceNumber(atom, int(md_atom.resid)) Biomol.setResidueAtomName(atom, md_atom.name) # fix positive charge on arginin nitrogen if md_atom.resname == 'ARG' and md_atom.name == 'NH2': Chem.setFormalCharge(atom, 1) coords = [] for coord in md_atom.position: coords.append(float(coord)) Chem.set3DCoordinates(atom, coords) coords_array = Math.Vector3DArray() coords_array.reserve(num_frames) Chem.set3DCoordinatesArray(atom, coords_array) Chem.setPEOECharge(atom, float(md_atom.charge)) i += 1 Chem.setAtomTypesFromSymbols(cdf_mol, True) # construct bonds print >> sys.stderr, '- Building bonds ...' for md_bond in u.bonds: cdf_mol.addBond(int(md_bond.atoms[0].index), int(md_bond.atoms[1].index)) print >> sys.stderr, '- Building water atom bonds ...' for water in waters.values(): if len(water) < 2: continue for atom_idx in water: if Chem.getType(cdf_mol.atoms[atom_idx]) == Chem.AtomType.O: if atom.numBonds > 1: break for atom_idx2 in water: if Chem.getType( cdf_mol.atoms[atom_idx2]) == Chem.AtomType.H: cdf_mol.addBond(atom_idx, atom_idx2) break # make sane biomolecule Chem.perceiveSSSR(cdf_mol, True) Chem.setRingFlags(cdf_mol, True) Chem.perceiveBondOrders(cdf_mol, True) Chem.perceiveHybridizationStates(cdf_mol, True) Chem.setAromaticityFlags(cdf_mol, True) Chem.calcFormalCharges(cdf_mol, True) # read timsteps and write cdf print >> sys.stderr, '- Importing coordinates ...' i = 0 traj_coords = [] atom_coords = Math.Vector3D() for ts in u.trajectory: print >> sys.stderr, '- Processing time step', i, '...' for md_atom in u.atoms: del traj_coords[:] for coord in md_atom.position: traj_coords.append(float(coord)) coords_array = Chem.get3DCoordinatesArray( cdf_mol.getAtom(int(md_atom.index))) atom_coords[0] = traj_coords[0] atom_coords[1] = traj_coords[1] atom_coords[2] = traj_coords[2] coords_array.addElement(atom_coords) i += 1 print >> sys.stderr, '- Writing output file:' if not Chem.FileCDFMolecularGraphWriter(sys.argv[3]).write(cdf_mol): print >> sys.stderr, '!! Could not write output file' sys.exit(2)
def process(): if len(sys.argv) < 3: print >> sys.stderr, 'Usage:', sys.argv[ 0], '[input.cdf] [output directory]' sys.exit(2) in_fname = path.splitext(path.basename(sys.argv[1]))[0] mol = Chem.BasicMolecule() cdf_reader = Chem.FileCDFMoleculeReader(sys.argv[1]) pvd_file = open(path.join(sys.argv[2], in_fname + '.pvd'), 'w') Util.writePVDHeader(pvd_file) print >> sys.stderr, '- Processing CDF-file:', sys.argv[1], '...' if not cdf_reader.read(mol): print '!! Could not read file' sys.exit(2) backbone_atoms = [] for atom in mol.atoms: if Biomol.isPDBBackboneAtom(atom) and Biomol.getResidueAtomName( atom) == 'C': backbone_atoms.append(atom) bond_list = [] for bond in mol.bonds: if Biomol.getResidueCode( bond.getAtom(0)) == 'HOH' or Biomol.getResidueCode( bond.getAtom(1)) == 'HOH': continue if Chem.getType(bond.getAtom(0)) == Chem.AtomType.H or Chem.getType( bond.getAtom(1)) == Chem.AtomType.H: continue bond_list.append(bond) num_confs = Chem.getNumConformations(mol) num_coords = len(bond_list) * 4 + ( len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM - 1) * 2 bond_ctr = Math.Vector3D() i = 0 while i < num_confs: line_x_coords = numpy.ndarray(num_coords, numpy.float32) line_y_coords = numpy.ndarray(num_coords, numpy.float32) line_z_coords = numpy.ndarray(num_coords, numpy.float32) atom_types = numpy.ndarray(num_coords, numpy.uint32) spline_ctrl_points = numpy.ndarray((len(backbone_atoms), 3), numpy.float32) j = 0 for atom in backbone_atoms: atom_pos = Chem.getConformer3DCoordinates(atom, i) spline_ctrl_points[j, 0] = atom_pos(0) spline_ctrl_points[j, 1] = atom_pos(1) spline_ctrl_points[j, 2] = atom_pos(2) j += 1 spline_pts = spline(spline_ctrl_points, len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM) j = 0 k = 0 while k < (len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM - 1): line_x_coords[j] = spline_pts[0][k] line_y_coords[j] = spline_pts[1][k] line_z_coords[j] = spline_pts[2][k] atom_types[j] = 0 j += 1 line_x_coords[j] = spline_pts[0][k + 1] line_y_coords[j] = spline_pts[1][k + 1] line_z_coords[j] = spline_pts[2][k + 1] atom_types[j] = 0 j += 1 k += 1 for bond in bond_list: atom1 = bond.getAtom(0) atom2 = bond.getAtom(1) atom1_pos = Chem.getConformer3DCoordinates(atom1, i) atom2_pos = Chem.getConformer3DCoordinates(atom2, i) atom1_type = Chem.getType(atom1) atom2_type = Chem.getType(atom2) bond_ctr.assign(atom1_pos) bond_ctr += atom2_pos bond_ctr *= 0.5 line_x_coords[j] = atom1_pos(0) line_y_coords[j] = atom1_pos(1) line_z_coords[j] = atom1_pos(2) atom_types[j] = atom1_type j += 1 line_x_coords[j] = bond_ctr(0) line_y_coords[j] = bond_ctr(1) line_z_coords[j] = bond_ctr(2) atom_types[j] = atom1_type j += 1 line_x_coords[j] = bond_ctr(0) line_y_coords[j] = bond_ctr(1) line_z_coords[j] = bond_ctr(2) atom_types[j] = atom2_type j += 1 line_x_coords[j] = atom2_pos(0) line_y_coords[j] = atom2_pos(1) line_z_coords[j] = atom2_pos(2) atom_types[j] = atom2_type j += 1 line_x_coords.resize(j) line_y_coords.resize(j) line_z_coords.resize(j) atom_types.resize(j) out_fname = in_fname + '_frame_no_' + str(i) out_path = path.join(sys.argv[2], out_fname) line_data = {'atom_type': atom_types} print >> sys.stderr, '- Writing structure data for frame', i, '...' if not pyevtk.hl.linesToVTK(out_path, line_x_coords, line_y_coords, line_z_coords, pointData=line_data): print '!! Could not write output file' sys.exit(2) Util.writePVDEntry(pvd_file, i, out_fname, 'vtu') i += 1 Util.writePVDFooter(pvd_file)
# name1 = "1K74_l_b" # name2 = "1K74_r_b" # name1 = "1KTZ_l_b" # name2 = "1KTZ_r_b" name1 = "1MAH_l_b" name2 = "1MAH_r_b" mol1 = Protein() mol1.fromFile("structures/" + name1 + ".pdb") mol2 = Protein() mol2.fromFile("structures/" + name2 + ".pdb") mol1.prepare() mol2.prepare() Chem.makeHydrogenDeplete(mol1) Chem.makeHydrogenDeplete(mol2) Biomol.FilePDBMolecularGraphWriter("compute/" + name1 + ".pdb").write(mol1) Biomol.FilePDBMolecularGraphWriter("compute/" + name2 + ".pdb").write(mol2) # ProteinTools.writePDB("compute/" + name1 + ".pdb", mol1) # ProteinTools.writePDB("compute/" + name2 + ".pdb", mol2) print("computing molecule surface") os.system("~/EDTSurf/EDTSurf -i " + "compute/" + name1 + ".pdb" + " -h 2 -s 3") os.system("~/EDTSurf/EDTSurf -i " + "compute/" + name2 + ".pdb" + " -h 2 -s 3 ") # -p 2.0 ''' atomCoords1 = Math.Vector3DArray() atomCoords2 = Math.Vector3DArray() Chem.get3DCoordinates(mol1, atomCoords1)
def isAlphaAtom(atom): return Biomol.getResidueAtomName(atom) == 'CA'
def readPDBFromStream(stream: Base.IOStream): r = Biomol.PDBMoleculeReader(stream) mol = Chem.BasicMolecule() r.read(mol) return Protein(mol)
def generate_ph(pdb, key): ifs = Base.FileIOStream(pdb, 'r') tlc = self.ligand_3_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(False) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) #------------------------- XVOLS int_env_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_env_ftrs, interactions, False) int_core_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_core_ftrs, interactions, True) int_pharm = Pharm.BasicPharmacophore(int_core_ftrs) for ftr in int_env_ftrs: if Pharm.getType( ftr ) == Pharm.FeatureType.H_BOND_DONOR or Pharm.getType( ftr) == Pharm.FeatureType.H_BOND_ACCEPTOR: Pharm.setTolerance(ftr, 1.0) else: Pharm.setTolerance(ftr, 1.5) Pharm.createExclusionVolumes(int_pharm, int_env_ftrs, 0.0, 0.1, False) int_env_ftr_atoms = Chem.Fragment() Pharm.getFeatureAtoms(int_env_ftrs, int_env_ftr_atoms) int_residue_atoms = Chem.Fragment() Biomol.extractResidueSubstructures(int_env_ftr_atoms, lig_env, int_residue_atoms, True) Chem.makeHydrogenDeplete(int_residue_atoms) def isAlphaAtom(atom): return Biomol.getResidueAtomName(atom) == 'CA' Chem.removeAtomsIfNot(int_residue_atoms, isAlphaAtom) Pharm.createExclusionVolumes(int_pharm, int_residue_atoms, Chem.Atom3DCoordinatesFunctor(), 1.0, 2.0, False) features_in_ph = [] for int_ftr in int_pharm: if Pharm.hasSubstructure(int_ftr) == False: continue elif ftype_names[Pharm.getType(int_ftr)] == 'XV': continue feature_id = generate_key(int_ftr) features_in_ph.append(str(feature_id)) self.unique_feature_vector.add(str(feature_id)) int_pharm.fv = features_in_ph int_pharm.path_to_pdb = pdb return int_pharm
def generate_ph(pdb, args, df_constructor, ts): ifs = Base.FileIOStream(pdb, 'r') tlc = args.ligand_three_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 for a in pdb_mol.atoms: Chem.setImplicitHydrogenCount(a, 0) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) Chem.calcFormalCharges(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(True) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) #Pharm.FilePMLFeatureContainerWriter('./test/lig_ph_' + str(ts) + '.pml').write(lig_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) df_constructor, interaction_at_ts = outputInteractions( lig_pharm, env_pharm, interactions, df_constructor) #Chem.FileSDFMolecularGraphWriter('./test/ligand_' + str(ts) + '.sdf').write(ligand) return df_constructor, interaction_at_ts