def __iter__(self): if self.properties is None: i = 0 while True: mol = Chem.BasicMolecule() try: if self.r.read(mol): yield sanitize_mol(mol) else: break except IOError: yield None i += 1 if i == self.nr_mols: break else: i = 0 while True: mol = Chem.BasicMolecule() try: if self.r.read(mol): read_properties = self._extract_properties_from_mol( mol) yield sanitize_mol(mol), read_properties else: break except IOError: yield None i += 1 if i == self.nr_mols: break
def cdfMol_pdb(pdb, output, name): initial_time = time.time() cdf_mol = Chem.BasicMolecule() pdb_mol = Chem.BasicMolecule() pdb_str = open(pdb, 'r').read().replace('WAT', 'HOH').replace('HIE', 'HIS') pdb_reader = Biomol.PDBMoleculeReader(Base.StringIOStream(pdb_str)) Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter( pdb_reader, True) if not pdb_reader.read(pdb_mol): return None Chem.calcImplicitHydrogenCounts(pdb_mol, False) Chem.perceiveHybridizationStates(pdb_mol, False) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.perceiveSSSR(pdb_mol, False) Chem.setRingFlags(pdb_mol, False) Chem.setAromaticityFlags(pdb_mol, False) cdf_mol.assign(pdb_mol) for atom in cdf_mol.atoms: Chem.set3DCoordinatesArray(atom, Math.Vector3DArray()) i = 0 while i < cdf_mol.numAtoms: Chem.get3DCoordinatesArray(cdf_mol.getAtom(i)).addElement( Chem.get3DCoordinates(pdb_mol.getAtom(i))) i += 1 tmp_output = output + name + ".cdf" try: Chem.FileCDFMolecularGraphWriter(tmp_output).write(cdf_mol) except: print('> Cdf_mol writing failure.') raise residues = Biomol.ResidueList(cdf_mol) tmp_output = output + name + "_residue_info.txt" with open(tmp_output, 'w') as txt_writer: txt_writer.write('residue name_resid_chain\n') for res in residues: res_id = getResidueID(res) txt_writer.write('{}: \n'.format(res_id)) calc_time = time.time() - initial_time print('> Cdf and amino acid residue number list files generated in {}s'. format(int(calc_time)))
def getMoleculeFromAtom(atom: Chem.BasicAtom, protein: Chem.BasicMolecule) -> (Chem.BasicMolecule, list): """ Given an atom and a protein structure, find the ligand the atom corresponds to. Traverses the molecule by its bonds until no longer any atoms are attached. All atoms and bonds are assigned to a new molecule, which is being returned. :param atom: :param protein: :return: The found ligand as well as the atom indices of the ligand in the parent molecule. """ ligand = Chem.Fragment() neighbors = set() # atoms not being added already neighborsAdded = set() # keep track of added atoms to not process twice atomsToRemove = [] neighbors.add(atom) while len(neighbors) > 0: n = neighbors.pop() neighborsAdded.add(n) ligand.addAtom(n) atomsToRemove.append(protein.getAtomIndex(n)) # get all the neighbor atoms for i, b in enumerate(n.bonds): for a in b.atoms: if a != n: if a not in neighbors and a not in neighborsAdded: # new atom neighbors.add(a) ligand.addBond(b) # ignored if already exists Chem.perceiveComponents(ligand, True) mol = Chem.BasicMolecule() mol.assign(ligand) return mol, atomsToRemove
def getSurfaceAtoms(mol): surfaceATomExtractor = Chem.SurfaceAtomExtractor() f = Chem.Fragment() surfaceATomExtractor.extract(mol, mol, f) surfaceAtoms = Chem.BasicMolecule() surfaceAtoms.assign(f) return surfaceAtoms
def readPDBFromStream(stream: Base.IOStream): from Protein import Protein from MoleculeTools import sanitize_mol r = Biomol.PDBMoleculeReader(stream) mol = Chem.BasicMolecule() r.read(mol) sanitize_mol(mol, makeHydrogenComplete=True) return Protein(mol)
def process(): if len(sys.argv) < 4: print('Usage:', sys.argv[0], '[input.sdf] [exclude-molecules.sdf] [output.sdf]', file=sys.stderr) sys.exit(2) ifs = Base.FileIOStream(sys.argv[1], 'r') xifs = Base.FileIOStream(sys.argv[2], 'r') ofs = Base.FileIOStream(sys.argv[3], 'w') reader = Chem.SDFMoleculeReader(ifs) xreader = Chem.SDFMoleculeReader(xifs) writer = Chem.SDFMolecularGraphWriter(ofs) mol = Chem.BasicMolecule() Chem.setMultiConfImportParameter(reader, False) Chem.setMultiConfImportParameter(xreader, False) Chem.setMultiConfExportParameter(writer, False) stats = Stats() stats.read = 0 stats.dropped = 0 xhashes = set() while xreader.read(mol): setupMolecule(mol) hashcode = Chem.calcHashCode(mol) xhashes.add(hashcode) while reader.read(mol): #print('Processing Molecule ' + str(stats.read) setupMolecule(mol) hashcode = Chem.calcHashCode(mol) if hashcode in xhashes: stats.dropped += 1 print('Dropped Molecule ' + str(stats.read) + ': ' + Chem.generateSMILES(mol) + ' ' + Chem.getName(mol), file=sys.stderr) else: writer.write(mol) stats.read += 1 if stats.read % 10000 == 0: print('Processed ' + str(stats.read) + ' Molecules...', file=sys.stderr) print('', file=sys.stderr) print('-- Summary --', file=sys.stderr) print('Molecules processed: ' + str(stats.read), file=sys.stderr) print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
def __init__(self, structure: Chem.BasicMolecule = None): self.shape: Shape.GaussianShape = Shape.GaussianShape() self.shapeFunc: Shape.GaussianShapeFunction = Shape.GaussianShapeFunction( ) self.coordinates: Math.Vector3DArray = Math.Vector3DArray() self.ligands: List[Chem.BasicMolecule] = [] self.surfaceAtoms: Chem.BasicMolecule = Chem.BasicMolecule() super(Protein, self).__init__() if structure: from MoleculeTools import sanitize_mol self.assign(structure)
def remove_components(mol: Chem.BasicMolecule) -> Chem.BasicMolecule: components = Chem.getComponents(mol) largest_component = None # set default number of atoms and index for comp in components: if largest_component is None: largest_component = comp elif comp.numAtoms > largest_component.numAtoms: largest_component = comp new_mol = Chem.BasicMolecule() new_mol.assign(largest_component) if Chem.hasStructureData(mol): Chem.setStructureData(new_mol, Chem.getStructureData(mol)) return new_mol
def loadCDFMolecule(fname): mol = Chem.BasicMolecule() cdf_reader = Chem.FileCDFMoleculeReader(fname) if not cdf_reader.read(mol): return None Chem.calcImplicitHydrogenCounts(mol, False) Chem.perceiveHybridizationStates(mol, False) Chem.setAtomSymbolsFromTypes(mol, False) Chem.perceiveSSSR(mol, False) Chem.setRingFlags(mol, False) Chem.setAromaticityFlags(mol, False) return mol
def getSurfaceExposedAtoms(self, copy=True) -> Chem.BasicMolecule: """ Get a list of CDPL Molecules located on the protein surface. :param copy: Whether to return a copy of the surface atoms or the surface atom object itself. :return: """ from ProteinTools import getSurfaceAtoms self.surfaceAtoms.assign(getSurfaceAtoms(self)) if copy: surfaceAtoms = Chem.BasicMolecule() surfaceAtoms.assign(self.surfaceAtoms) return surfaceAtoms return self.surfaceAtoms
def CDPLmolFromSdf(sdf_path, conformation): ''' generates a single CDPL Molecule from an sdf-file. If conformations is true, then one random conformation will be generated. \n Input: \n sdf_path (string): path to the sdf file \n conformation (boolean): generates one 3d conformation according to MMFF94 \n Return: \n (CDPL BasicMolecule): the corresponding CDPL BasicMolecule ''' mol = Chem.BasicMolecule() ifs = Base.FileIOStream(sdf_path, 'r') sdf_reader = Chem.SDFMoleculeReader(ifs) if not sdf_reader.read(mol): log.error("COULD NOT READ SDF", sdf_path) return False if conformation: return _CDPLgenerateConformation(mol) return mol
def CDPLmolFromSmiles(smiles_path, conformation): ''' generates a CDPL Molecule from smiles. If confromations is true, then one random conformation will be generated with explicit hydrogens. \n Input: \n smiles (string): smiles string \n conformation (boolean): generates one 3d conformation according to MMFF94 \n Return: \n (CDPL BasicMolecule): the corresponding CDPL BasicMolecule ''' mol = Chem.BasicMolecule() if ".smi" in smiles_path: smi_reader = Chem.FileSMILESMoleculeReader(smiles_path) if not smi_reader.read(mol): log.error("COULD NOT READ Smiles", smiles_path) return False else: mol = Chem.parseSMILES(smiles_path) if conformation: return _CDPLgenerateConformation(mol) else: return mol
def _CDPLreadFromPDBFile(pdb_file): ''' PRIVAT METHOD reads a pdb file and is used by the CDPLreadProteinFile method. Input: \n pdb_file (string): the path to the pdb file \n Return: \n (CDPL BasicMolecule): the corresponding pdb molecule ''' ifs = Base.FileIOStream(pdb_file, 'r') pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter( pdb_reader, False ) #TODO Should this be there for the pdb readin? or also in the config? if not pdb_reader.read(pdb_mol): log.error("COULD NOT READ PDB", pdb_file) return False return pdb_mol
def process(sdf_file, psd_file_path): ifs = Base.FileIOStream(sdf_file, 'r') reader = Chem.SDFMoleculeReader(ifs) mol = Chem.BasicMolecule() Chem.setMultiConfImportParameter(reader, True) psd_creator = Pharm.PSDScreeningDBCreator( psd_file_path, Pharm.PSDScreeningDBCreator.CREATE, True) i = 0 t0 = time.clock() while reader.read(mol): setupMolecule(mol) psd_creator.process(mol) i += 1 if i % 100 == 0: print 'Processed ' + str(i) + ' molecules (' + str( time.clock() - t0), 's elapsed)...' t0 = time.clock() mol.clear() print '' print '-- Summary --' print 'Molecules processed: ' + str(psd_creator.numProcessed) print 'Molecules rejected: ' + str(psd_creator.numRejected) print 'Molecules deleted: ' + str(psd_creator.numDeleted) print 'Molecules inserted: ' + str(psd_creator.numInserted) psd_creator.close()
def generate_ph(pdb, key): ifs = Base.FileIOStream(pdb, 'r') tlc = self.ligand_3_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(False) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) #------------------------- XVOLS int_env_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_env_ftrs, interactions, False) int_core_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_core_ftrs, interactions, True) int_pharm = Pharm.BasicPharmacophore(int_core_ftrs) for ftr in int_env_ftrs: if Pharm.getType( ftr ) == Pharm.FeatureType.H_BOND_DONOR or Pharm.getType( ftr) == Pharm.FeatureType.H_BOND_ACCEPTOR: Pharm.setTolerance(ftr, 1.0) else: Pharm.setTolerance(ftr, 1.5) Pharm.createExclusionVolumes(int_pharm, int_env_ftrs, 0.0, 0.1, False) int_env_ftr_atoms = Chem.Fragment() Pharm.getFeatureAtoms(int_env_ftrs, int_env_ftr_atoms) int_residue_atoms = Chem.Fragment() Biomol.extractResidueSubstructures(int_env_ftr_atoms, lig_env, int_residue_atoms, True) Chem.makeHydrogenDeplete(int_residue_atoms) def isAlphaAtom(atom): return Biomol.getResidueAtomName(atom) == 'CA' Chem.removeAtomsIfNot(int_residue_atoms, isAlphaAtom) Pharm.createExclusionVolumes(int_pharm, int_residue_atoms, Chem.Atom3DCoordinatesFunctor(), 1.0, 2.0, False) features_in_ph = [] for int_ftr in int_pharm: if Pharm.hasSubstructure(int_ftr) == False: continue elif ftype_names[Pharm.getType(int_ftr)] == 'XV': continue feature_id = generate_key(int_ftr) features_in_ph.append(str(feature_id)) self.unique_feature_vector.add(str(feature_id)) int_pharm.fv = features_in_ph int_pharm.path_to_pdb = pdb return int_pharm
import CDPL.Chem as Chem import CDPL.Math as Math def process(): if len(sys.argv) < 4: print('Usage:', sys.argv[0], 'training-set.sdf logP-data regression-coeff-file', file=sys.stderr) sys.exit(2) struct_is = Base.FileIOStream(sys.argv[1], 'r') exp_logp_is = Base.FileIOStream(sys.argv[2], 'r') coeff_os = Base.FileIOStream(sys.argv[3], 'w') mlr_model = Math.DMLRModel() sdf_reader = Chem.SDFMoleculeReader(struct_is) mol = Chem.BasicMolecule() xlogp_calc = Chem.XLogPCalculator() histo = Math.DVector() histo.resize(Chem.XLogPCalculator.FEATURE_VECTOR_SIZE) Chem.setMultiConfImportParameter(sdf_reader, False) while sdf_reader.read(mol): exp_logp = float(exp_logp_is.readline()) Chem.perceiveComponents(mol, False) Chem.perceiveSSSR(mol, False) Chem.setRingFlags(mol, False) Chem.calcImplicitHydrogenCounts(mol, False) Chem.perceiveHybridizationStates(mol, False)
def process(): if len(sys.argv) < 3: print >> sys.stderr, 'Usage:', sys.argv[ 0], '[input.cdf] [output directory]' sys.exit(2) in_fname = path.splitext(path.basename(sys.argv[1]))[0] mol = Chem.BasicMolecule() cdf_reader = Chem.FileCDFMoleculeReader(sys.argv[1]) pvd_file = open(path.join(sys.argv[2], in_fname + '.pvd'), 'w') Util.writePVDHeader(pvd_file) print >> sys.stderr, '- Processing CDF-file:', sys.argv[1], '...' if not cdf_reader.read(mol): print '!! Could not read file' sys.exit(2) backbone_atoms = [] for atom in mol.atoms: if Biomol.isPDBBackboneAtom(atom) and Biomol.getResidueAtomName( atom) == 'C': backbone_atoms.append(atom) bond_list = [] for bond in mol.bonds: if Biomol.getResidueCode( bond.getAtom(0)) == 'HOH' or Biomol.getResidueCode( bond.getAtom(1)) == 'HOH': continue if Chem.getType(bond.getAtom(0)) == Chem.AtomType.H or Chem.getType( bond.getAtom(1)) == Chem.AtomType.H: continue bond_list.append(bond) num_confs = Chem.getNumConformations(mol) num_coords = len(bond_list) * 4 + ( len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM - 1) * 2 bond_ctr = Math.Vector3D() i = 0 while i < num_confs: line_x_coords = numpy.ndarray(num_coords, numpy.float32) line_y_coords = numpy.ndarray(num_coords, numpy.float32) line_z_coords = numpy.ndarray(num_coords, numpy.float32) atom_types = numpy.ndarray(num_coords, numpy.uint32) spline_ctrl_points = numpy.ndarray((len(backbone_atoms), 3), numpy.float32) j = 0 for atom in backbone_atoms: atom_pos = Chem.getConformer3DCoordinates(atom, i) spline_ctrl_points[j, 0] = atom_pos(0) spline_ctrl_points[j, 1] = atom_pos(1) spline_ctrl_points[j, 2] = atom_pos(2) j += 1 spline_pts = spline(spline_ctrl_points, len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM) j = 0 k = 0 while k < (len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM - 1): line_x_coords[j] = spline_pts[0][k] line_y_coords[j] = spline_pts[1][k] line_z_coords[j] = spline_pts[2][k] atom_types[j] = 0 j += 1 line_x_coords[j] = spline_pts[0][k + 1] line_y_coords[j] = spline_pts[1][k + 1] line_z_coords[j] = spline_pts[2][k + 1] atom_types[j] = 0 j += 1 k += 1 for bond in bond_list: atom1 = bond.getAtom(0) atom2 = bond.getAtom(1) atom1_pos = Chem.getConformer3DCoordinates(atom1, i) atom2_pos = Chem.getConformer3DCoordinates(atom2, i) atom1_type = Chem.getType(atom1) atom2_type = Chem.getType(atom2) bond_ctr.assign(atom1_pos) bond_ctr += atom2_pos bond_ctr *= 0.5 line_x_coords[j] = atom1_pos(0) line_y_coords[j] = atom1_pos(1) line_z_coords[j] = atom1_pos(2) atom_types[j] = atom1_type j += 1 line_x_coords[j] = bond_ctr(0) line_y_coords[j] = bond_ctr(1) line_z_coords[j] = bond_ctr(2) atom_types[j] = atom1_type j += 1 line_x_coords[j] = bond_ctr(0) line_y_coords[j] = bond_ctr(1) line_z_coords[j] = bond_ctr(2) atom_types[j] = atom2_type j += 1 line_x_coords[j] = atom2_pos(0) line_y_coords[j] = atom2_pos(1) line_z_coords[j] = atom2_pos(2) atom_types[j] = atom2_type j += 1 line_x_coords.resize(j) line_y_coords.resize(j) line_z_coords.resize(j) atom_types.resize(j) out_fname = in_fname + '_frame_no_' + str(i) out_path = path.join(sys.argv[2], out_fname) line_data = {'atom_type': atom_types} print >> sys.stderr, '- Writing structure data for frame', i, '...' if not pyevtk.hl.linesToVTK(out_path, line_x_coords, line_y_coords, line_z_coords, pointData=line_data): print '!! Could not write output file' sys.exit(2) Util.writePVDEntry(pvd_file, i, out_fname, 'vtu') i += 1 Util.writePVDFooter(pvd_file)
def generate_ph(pdb, args, df_constructor, ts): ifs = Base.FileIOStream(pdb, 'r') tlc = args.ligand_three_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 for a in pdb_mol.atoms: Chem.setImplicitHydrogenCount(a, 0) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) Chem.calcFormalCharges(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(True) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) #Pharm.FilePMLFeatureContainerWriter('./test/lig_ph_' + str(ts) + '.pml').write(lig_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) df_constructor, interaction_at_ts = outputInteractions( lig_pharm, env_pharm, interactions, df_constructor) #Chem.FileSDFMolecularGraphWriter('./test/ligand_' + str(ts) + '.sdf').write(ligand) return df_constructor, interaction_at_ts
def cleanStructures(): if len(sys.argv) < 5: print('Usage:', sys.argv[0], '[input.sdf] [output.sdf] [dropped.sdf] [start_index] [[count]]', file=sys.stderr) sys.exit(2) ifs = Base.FileIOStream(sys.argv[1], 'r') ofs = Base.FileIOStream(sys.argv[2], 'w') dofs = Base.FileIOStream(sys.argv[3], 'w') offset = int(sys.argv[4]) count = 0 if len(sys.argv) > 5: count = int(sys.argv[5]) reader = Chem.SDFMoleculeReader(ifs) writer = Chem.SDFMolecularGraphWriter(ofs) dwriter = Chem.SDFMolecularGraphWriter(dofs) mol = Chem.BasicMolecule() #Chem.setSMILESRecordFormatParameter(reader, 'SN') stats = Stats() stats.read = 0 stats.dropped = 0 stats.modified = 0 Chem.setMultiConfImportParameter(reader, False) Chem.setMultiConfExportParameter(writer, False) Chem.setMultiConfExportParameter(dwriter, False) if offset > 0: print('Skipping Molecules to Start Index ' + str(offset), file=sys.stderr) reader.setRecordIndex(offset) #print('Finished Setting Record Index', file=sys.stderr) stats.read = offset while reader.read(mol): #print('Processing Molecule ' + str(stats.read) proc_mol = processMolecule(mol, stats) if proc_mol: writer.write(proc_mol) else: stats.dropped += 1 dwriter.write(mol) print('Dropped Molecule ' + str(stats.read) + ': ' + generateSMILES(mol) + ' ' + Chem.getName(mol), file=sys.stderr) stats.read += 1 if stats.read % 10000 == 0: print('Processed ' + str(stats.read - offset) + ' Molecules...', file=sys.stderr) if count > 0 and (stats.read - offset) >= count: break print('', file=sys.stderr) print('-- Summary --', file=sys.stderr) print('Molecules processed: ' + str(stats.read - offset), file=sys.stderr) print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr) print('Molecules modified: ' + str(stats.modified), file=sys.stderr)
def mol_from_smiles(smiles: str) -> Chem.BasicMolecule: ifs = Base.StringIOStream(smiles) mol = Chem.BasicMolecule() r = Chem.SMILESMoleculeReader(ifs) r.read(mol) return sanitize_mol(mol)
def readPDBFromStream(stream: Base.IOStream): r = Biomol.PDBMoleculeReader(stream) mol = Chem.BasicMolecule() r.read(mol) return Protein(mol)
# remove ligand and water, just to be sure protein.separateLigandFromProtein() # calculate surface area for a single carbon atom protein.getGaussianShape() shapeFunc = protein.shapeFunc carbonProteinIndex = None for a in protein.atoms: if Chem.getType(a) == 6: carbonProteinIndex = protein.getAtomIndex(a) break surfAreaCarbonProtein = shapeFunc.calcSurfaceArea(carbonProteinIndex) # calculate the surface area contribution here? # create a simple carbon molecule with coordinates carbon = Chem.BasicMolecule() cAtom = carbon.addAtom() Chem.setType(cAtom, 6) coords = Math.Vector3D() coords.assign([1, 2, 3]) Chem.set3DCoordinates(cAtom, coords) # calculate shape and surface area of carbon molecule carbonShape, carbonShapeFunc = getGaussianShapeOfMolecule(carbon) surfAreaCarbon = carbonShapeFunc.surfaceArea # assert that contribution of carbon atom in protein is in fact the entire surface area of a single carbon assert surfAreaCarbon == surfAreaCarbonProtein # What am I missing here? # Summing the surface area of all atoms in the protein yields the surface area of the protein. I find it hard to
def process(): if len(sys.argv) < 4: print( 'Usage:', sys.argv[0], '[input torsion rules.xml] [structures.sdf] [output torsion histogram library.sdf]', file=sys.stderr) sys.exit(2) tor_lib = ConfGen.TorsionLibrary() try: tor_lib.load(Base.FileIOStream(sys.argv[1], 'r')) except: print('Error while loading input torsion rules:', sys.exc_info()[0], file=sys.stderr) sys.exit(2) tor_matcher = ConfGen.TorsionRuleMatcher(tor_lib) tor_matcher.findAllRuleMappings(True) tor_matcher.findUniqueMappingsOnly(True) tor_matcher.stopAtFirstMatchingRule(True) mol = Chem.BasicMolecule() mol_reader = Chem.FileSDFMoleculeReader(sys.argv[2]) Chem.setMultiConfImportParameter(mol_reader, False) print('- Analyzing input structures...', file=sys.stderr) i = 1 rule_to_angle_hists = {} coords = Math.Vector3DArray() while True: try: if not mol_reader.read(mol): break except IOError as e: print('Error while reading input molecule', i, ':', e, file=sys.stderr) i += 1 continue if i % 500 == 0: print(' ... At input molecule', i, '...', file=sys.stderr) Chem.initSubstructureSearchTarget(mol, False) try: Chem.get3DCoordinates(mol, coords) except Base.ItemNotFound: print('Could not get 3D-coordinates for molecule', i, file=sys.stderr) i += 1 continue for bond in mol.bonds: if Chem.getRingFlag(bond): continue if Chem.isHydrogenBond(bond): continue if Chem.getExplicitBondCount( bond.getBegin()) <= 1 or Chem.getExplicitBondCount( bond.getEnd()) <= 1: continue tor_matcher.findMatches(bond, mol, False) for match in tor_matcher: processMatch(i, match, mol, coords, rule_to_angle_hists) i += 1 print('- Processing torsion angle histograms...', file=sys.stderr) processHistograms(tor_lib, rule_to_angle_hists) print('- Writing output torsion library...', file=sys.stderr) try: tor_lib.save(Base.FileIOStream(sys.argv[3], 'w+')) except: print('Error while writing torsion library:', sys.exc_info()[0], file=sys.stderr) sys.exit(2) print('DONE!', file=sys.stderr)
def process(): if len(sys.argv) < 4: print >> sys.stderr, 'Usage:', sys.argv[ 0], '[input topology-file] [input coordinates-file] [output CDF-file]' sys.exit(2) print >> sys.stderr, '- Processing topology-file', sys.argv[ 1], 'and coordinates-file', sys.argv[2], '...' u = MDAnalysis.Universe(sys.argv[1], sys.argv[2]) cdf_mol = Chem.BasicMolecule() cdf_mol.reserveMemoryForAtoms(len(u.atoms)) cdf_mol.reserveMemoryForBonds(len(u.bonds)) print >> sys.stderr, '- Num. atoms:', len(u.atoms) print >> sys.stderr, '- Num. bonds:', len(u.bonds) num_frames = len(u.trajectory) print >> sys.stderr, '- Num. frames:', num_frames # construct atoms print >> sys.stderr, '- Building atoms ...' waters = {} i = 0 for md_atom in u.atoms: atom = cdf_mol.addAtom() sym = MDAnalysis.topology.guessers.guess_atom_element(md_atom.name) Chem.setSymbol(atom, sym.title()) Chem.setImplicitHydrogenCount(atom, 0) Biomol.setChainID(atom, md_atom.segid) if md_atom.resname == 'WAT': Biomol.setResidueCode(atom, 'HOH') else: Biomol.setResidueCode(atom, md_atom.resname) if Biomol.getResidueCode(atom) == 'HOH': if md_atom.resid in waters: waters[md_atom.resid].append(i) else: waters[md_atom.resid] = [i] Biomol.setResidueSequenceNumber(atom, int(md_atom.resid)) Biomol.setResidueAtomName(atom, md_atom.name) # fix positive charge on arginin nitrogen if md_atom.resname == 'ARG' and md_atom.name == 'NH2': Chem.setFormalCharge(atom, 1) coords = [] for coord in md_atom.position: coords.append(float(coord)) Chem.set3DCoordinates(atom, coords) coords_array = Math.Vector3DArray() coords_array.reserve(num_frames) Chem.set3DCoordinatesArray(atom, coords_array) Chem.setPEOECharge(atom, float(md_atom.charge)) i += 1 Chem.setAtomTypesFromSymbols(cdf_mol, True) # construct bonds print >> sys.stderr, '- Building bonds ...' for md_bond in u.bonds: cdf_mol.addBond(int(md_bond.atoms[0].index), int(md_bond.atoms[1].index)) print >> sys.stderr, '- Building water atom bonds ...' for water in waters.values(): if len(water) < 2: continue for atom_idx in water: if Chem.getType(cdf_mol.atoms[atom_idx]) == Chem.AtomType.O: if atom.numBonds > 1: break for atom_idx2 in water: if Chem.getType( cdf_mol.atoms[atom_idx2]) == Chem.AtomType.H: cdf_mol.addBond(atom_idx, atom_idx2) break # make sane biomolecule Chem.perceiveSSSR(cdf_mol, True) Chem.setRingFlags(cdf_mol, True) Chem.perceiveBondOrders(cdf_mol, True) Chem.perceiveHybridizationStates(cdf_mol, True) Chem.setAromaticityFlags(cdf_mol, True) Chem.calcFormalCharges(cdf_mol, True) # read timsteps and write cdf print >> sys.stderr, '- Importing coordinates ...' i = 0 traj_coords = [] atom_coords = Math.Vector3D() for ts in u.trajectory: print >> sys.stderr, '- Processing time step', i, '...' for md_atom in u.atoms: del traj_coords[:] for coord in md_atom.position: traj_coords.append(float(coord)) coords_array = Chem.get3DCoordinatesArray( cdf_mol.getAtom(int(md_atom.index))) atom_coords[0] = traj_coords[0] atom_coords[1] = traj_coords[1] atom_coords[2] = traj_coords[2] coords_array.addElement(atom_coords) i += 1 print >> sys.stderr, '- Writing output file:' if not Chem.FileCDFMolecularGraphWriter(sys.argv[3]).write(cdf_mol): print >> sys.stderr, '!! Could not write output file' sys.exit(2)