def _CDPLgenerateConformation(cdpl_mol): ''' PRIVAT METHOD configures a CDPL Molecule for conformation generation. \n Input: \n mol (CDPL BasicMolecule): a CDPL BasicMolecule \n Return: \n (CDPL BasicMolecule): the corresponding random conf. for the input BasicMolecule ''' _CDPLconfigForConformation( cdpl_mol ) #TODO What exactly should be in the config for the cmp generation? cg = ConfGen.RandomConformerGenerator() coords = Math.Vector3DArray() i = 0 cg.strictMMFF94AtomTyping = False ConfGen.prepareForConformerGeneration(cdpl_mol) coords.resize(cdpl_mol.numAtoms, Math.Vector3D()) cg.setup(cdpl_mol) if cg.generate(coords) != ConfGen.RandomConformerGenerator.SUCCESS: log.error('! Conformer generation failed !') return Chem.set3DCoordinates(cdpl_mol, coords) return cdpl_mol
def getMoleculeFromAtom(atom: Chem.BasicAtom, protein: Chem.BasicMolecule) -> (Chem.BasicMolecule, list): """ Given an atom and a protein structure, find the ligand the atom corresponds to. Traverses the molecule by its bonds until no longer any atoms are attached. All atoms and bonds are assigned to a new molecule, which is being returned. :param atom: :param protein: :return: The found ligand as well as the atom indices of the ligand in the parent molecule. """ ligand = Chem.Fragment() neighbors = set() # atoms not being added already neighborsAdded = set() # keep track of added atoms to not process twice atomsToRemove = [] neighbors.add(atom) while len(neighbors) > 0: n = neighbors.pop() neighborsAdded.add(n) ligand.addAtom(n) atomsToRemove.append(protein.getAtomIndex(n)) # get all the neighbor atoms for i, b in enumerate(n.bonds): for a in b.atoms: if a != n: if a not in neighbors and a not in neighborsAdded: # new atom neighbors.add(a) ligand.addBond(b) # ignored if already exists Chem.perceiveComponents(ligand, True) mol = Chem.BasicMolecule() mol.assign(ligand) return mol, atomsToRemove
def getSurfaceAtoms(mol): surfaceATomExtractor = Chem.SurfaceAtomExtractor() f = Chem.Fragment() surfaceATomExtractor.extract(mol, mol, f) surfaceAtoms = Chem.BasicMolecule() surfaceAtoms.assign(f) return surfaceAtoms
def __iter__(self): if self.properties is None: i = 0 while True: mol = Chem.BasicMolecule() try: if self.r.read(mol): yield sanitize_mol(mol) else: break except IOError: yield None i += 1 if i == self.nr_mols: break else: i = 0 while True: mol = Chem.BasicMolecule() try: if self.r.read(mol): read_properties = self._extract_properties_from_mol( mol) yield sanitize_mol(mol), read_properties else: break except IOError: yield None i += 1 if i == self.nr_mols: break
def prepare(self, removeLigands=True): from MoleculeTools import sanitize_mol sanitize_mol(self, makeHydrogenComplete=True) Chem.generateHydrogen3DCoordinates(self, True) if removeLigands: self.removeLigands()
def translate_mol_to_coords(mol, coords): mol_coords = Math.Vector3DArray() Chem.get3DCoordinates(mol, mol_coords) for i, row in enumerate(mol_coords): for j, column in enumerate(row): row[j] = column - coords[j] # shift to desired coordinates Chem.set3DCoordinates(mol, mol_coords) return mol
def mol_to_sdf(molecules, path, multiconf=True): if not isinstance(molecules, Iterable): molecules = [molecules] w = Chem.FileSDFMolecularGraphWriter(path) Chem.setMultiConfExportParameter(w, multiconf) for mol in molecules: Chem.calcImplicitHydrogenCounts(mol, False) w.write(mol) w.close()
def makeRandomRotation(self, inplace: bool = True) -> Math.Vector3DArray: # TODO: maybe add boundaries for randomness rotMatrix = Math.Matrix3D() rotMatrix.assign(Rotation.random().as_matrix()) rotatedCoords = rotate3DObject(self.getCoordinates(), rotMatrix) if inplace: Chem.set3DCoordinates(self, rotatedCoords) return rotatedCoords
def prepareProtein(protein, removeLigands=True, removeWater=True): from MoleculeTools import sanitize_mol sanitize_mol(self, makeHydrogenComplete=True) Chem.generateHydrogen3DCoordinates(self, True) if removeLigands: self.removeLigands(removeWater=removeWater) return protein
def calcAtomSetCentroid(atoms, conf_idx): if len(atoms) == 1: return Chem.getConformer3DCoordinates(atoms[0], conf_idx) ctr = Math.Vector3D() for atom in atoms: ctr += Chem.getConformer3DCoordinates(atom, conf_idx) ctr /= len(atoms) return ctr
def __init__(self, lig_feature, env_feature): ftype_names = { Pharm.FeatureType.H_BOND_ACCEPTOR: 'HBA', Pharm.FeatureType.H_BOND_DONOR: 'HBD', Pharm.FeatureType.POS_IONIZABLE: 'PI', Pharm.FeatureType.NEG_IONIZABLE: 'NI', Pharm.FeatureType.AROMATIC: 'AR', Pharm.FeatureType.HYDROPHOBIC: 'H', Pharm.FeatureType.X_VOLUME: 'XV' } lig_feature_type = ftype_names[Pharm.getType(lig_feature)] lig_residue_code = Biomol.getResidueCode( Pharm.getSubstructure(lig_feature).atoms[0]) lig_residue_number = Biomol.getResidueSequenceNumber( Pharm.getSubstructure(lig_feature).atoms[0]) lig_residue_chain = Biomol.getChainID( Pharm.getSubstructure(lig_feature).atoms[0]) env_feature_type = ftype_names[Pharm.getType(env_feature)] env_residue_code = Biomol.getResidueCode( Pharm.getSubstructure(env_feature).atoms[0]) env_residue_number = Biomol.getResidueSequenceNumber( Pharm.getSubstructure(env_feature).atoms[0]) env_residue_chain = Biomol.getChainID( Pharm.getSubstructure(env_feature).atoms[0]) self.interaction_type = '{}-{}'.format(lig_feature_type, env_feature_type) self.lig_residue = '{}_{}_{}'.format(lig_residue_code, lig_residue_number, lig_residue_chain) self.env_residue = '{}_{}_{}'.format(env_residue_code, env_residue_number, env_residue_chain) atoms = [] for atom in Pharm.getSubstructure(lig_feature).atoms: key_atom = '{}:{}'.format(Chem.getSymbol(atom), Biomol.getSerialNumber(atom)) atoms.append(key_atom) self.lig_atom = sorted(atoms, key=lambda k: int(k.split(':')[1])) atoms = [] for atom in Pharm.getSubstructure(env_feature).atoms: key_atom = '{}:{}'.format(Chem.getSymbol(atom), Biomol.getSerialNumber(atom)) atoms.append(key_atom) self.env_atom = sorted(atoms, key=lambda k: int(k.split(':')[1]))
def calculateECFP(mol, nIter=4, nBits=1021): """ Calculate the ECFP fingerprint for a given molecule. :param mol: :param nIter: :param nBits: :return: """ Chem.makeHydrogenComplete(mol) ecfpGen = Chem.CircularFingerprintGenerator() ecfpGen.setNumIterations(nIter) ecfpGen.setNumBits(nBits) bitv = Util.BitSet() ecfpGen.generate(mol, bitv) return bitv
def center_mol(mol): coords = Math.Vector3DArray() Chem.get3DCoordinates(mol, coords) np_coords = np.array(coords) centroid = get_centroid(np_coords) centered = np_coords - centroid # set coordinates coordinate object for i, row in enumerate(coords): for j, column in enumerate(row): row[j] = centered[i, j] # set coordinates to molecule Chem.set3DCoordinates(mol, coords) return mol
def process(): if len(sys.argv) < 3: print('Usage:', sys.argv[0], '[input.sdf] [output.sdf]', file=sys.stderr) sys.exit(2) ifs = Base.FileIOStream(sys.argv[1], 'r') ofs = Base.FileIOStream(sys.argv[2], 'w') reader = Chem.SDFMoleculeReader(ifs) writer = Chem.SDFMolecularGraphWriter(ofs) mol = Chem.BasicMolecule() Chem.setMultiConfImportParameter(reader, False) Chem.setMultiConfExportParameter(writer, False) stats = Stats() stats.read = 0 stats.dropped = 0 xhashes = set() while reader.read(mol): setupMolecule(mol) hashcode = Chem.calcHashCode(mol) if hashcode in xhashes: stats.dropped += 1 print('Removed Duplicate Molecule ' + str(stats.read) + ': ' + Chem.generateSMILES(mol) + ' ' + Chem.getName(mol), file=sys.stderr) else: xhashes.add(hashcode) writer.write(mol) stats.read += 1 if stats.read % 10000 == 0: print('Processed ' + str(stats.read) + ' Molecules...', file=sys.stderr) print('', file=sys.stderr) print('-- Summary --', file=sys.stderr) print('Molecules processed: ' + str(stats.read), file=sys.stderr) print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
def _CDPLextractProteinFragments(pdb_mol, lig_three_letter_code, radius=6.0): lig = Chem.Fragment() _CDPLcalcProteinProperties(pdb_mol) # extract ligand for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == lig_three_letter_code: Biomol.extractResidueSubstructure(atom, pdb_mol, lig, False) if lig.numAtoms == 0: log.error("The defined three letter code is not existing:", lig_three_letter_code) # extract environment env = Chem.Fragment() Biomol.extractEnvironmentResidues(lig, pdb_mol, env, float(radius)) return env, lig
def encodePhaInfo2(surface, pha, invert=False): types = [-1, -1, -1, 0, 1, 2, 3, -1, -1, -1, -1, -1] invertedTypes = [-1, -1, -1, 1, 0, 3, 2, -1, -1, -1, -1, -1] typeCount = 4 encoding = np.full((len(surface), typeCount), np.inf) count = 0 for feature in pha: count = count + 1 featureType = Pharm.getType(feature) if invert: index = invertedTypes[featureType] else: index = types[featureType] if index < 0: continue featureCoords = np.array(Chem.get3DCoordinates(feature)) for i in range(len(surface)): pt = surface[i] dist = np.linalg.norm(pt - featureCoords) encoding[i][index] = min(encoding[i][index], dist) print(count) for enc in encoding: minV = 0 for i in range(typeCount): if enc[minV] > enc[i]: minV = i # minDist = enc[minV] for i in range(typeCount): enc[i] = 0 # if minDist < 20: enc[minV] = 1 return encoding
def _extract_properties_from_mol(self, mol): read_properties = {} data = Chem.getStructureData(mol) for element in data: if element.header in self.properties: read_properties[element.header[2:-1]] = element.data return read_properties
def is_inorganic(mol: Chem.BasicMolecule) -> bool: for atom in mol.atoms: if Chem.getType(atom) != 6: continue else: return False return True
def saveCDFMolecule(fname, mol): cdf_writer = Chem.FileCDFMolecularGraphWriter(fname) if not cdf_writer.write(mol): return None return mol
def makeRandomTranslation(self, inplace: bool = True, scalingFactor: float = 10) -> Math.Vector3DArray: """ :param inplace: :param scalingFactor: Scales the randomly retrieved direction by this factor :return: """ direction = Math.Vector3D() direction.assign(np.random.rand(3) * scalingFactor) translatedCoords = translate3DObject(self.getCoordinates(), direction) if inplace: Chem.set3DCoordinates(self, translatedCoords) return translatedCoords
def generateSMILES(mol): if not PRINT_SMILES: return '' try: return Chem.generateSMILES(mol) except: return ''
def _CDPLgeneratePha(mol, pha_type): ''' PRIVAT METHOD generates the pharmacophore for the molecule and is used by the CDPLphaGenerator. Input: \n mol (CDPL BasicMolecule): the molecule the pharmacophore needs to be generated for lig_only (string): either True, then there are is no hydrogens coordinates being calculated \n Return: \n (CDPL BasicPharmacophore): the corresponding pharmacophore ''' if pha_type is not 'lig_only': #TODO What exactly should be in the config for the pha generation? Chem.generateHydrogen3DCoordinates(mol, True) pharm = Pharm.BasicPharmacophore() pharm_generator = Pharm.DefaultPharmacophoreGenerator(True) pharm_generator.generate(mol, pharm) return pharm
def getAllHeavyAtoms(res): atoms = [] for atom in res.atoms: if Chem.getType(atom) != Chem.AtomType.H: atoms.append(atom) return atoms
def readPDBFromStream(stream: Base.IOStream): from Protein import Protein from MoleculeTools import sanitize_mol r = Biomol.PDBMoleculeReader(stream) mol = Chem.BasicMolecule() r.read(mol) sanitize_mol(mol, makeHydrogenComplete=True) return Protein(mol)
def _generateNodes(self, pha): ''' PRIVATE METHOD generates the nodes of the graph \n Input \n pha (CDPL BasicPharmacophore): pha the graph is based on ''' index_counter = 0 for feature in pha: node = PhaNode() node.feature_type = self._getAllowedSet(Pharm.getType(feature), ELEM_LIST) node.coords[0] = round(Chem.get3DCoordinates(feature)[0], 6) node.coords[1] = round(Chem.get3DCoordinates(feature)[1], 6) node.coords[2] = round(Chem.get3DCoordinates(feature)[2], 6) node.index = index_counter index_counter += 1 self.nodes.append(node)
def is_metal(mol: Chem.BasicMolecule) -> bool: """ Indicate if the compound contains a metal """ for atom in mol.atoms: if Chem.getType(atom) in ALLOWED_ATOMS: continue else: return True return False
def CDPLmolFromSdf(sdf_path, conformation): ''' generates a single CDPL Molecule from an sdf-file. If conformations is true, then one random conformation will be generated. \n Input: \n sdf_path (string): path to the sdf file \n conformation (boolean): generates one 3d conformation according to MMFF94 \n Return: \n (CDPL BasicMolecule): the corresponding CDPL BasicMolecule ''' mol = Chem.BasicMolecule() ifs = Base.FileIOStream(sdf_path, 'r') sdf_reader = Chem.SDFMoleculeReader(ifs) if not sdf_reader.read(mol): log.error("COULD NOT READ SDF", sdf_path) return False if conformation: return _CDPLgenerateConformation(mol) return mol
def is_macrocyle(mol, ring_size=7): """ Checks if the given molecule contains rings larger than given size. If yes --> macrocyle. :param mol: :param ring_size: :return: Boolean indicating if macrocylce or not """ sssr = Chem.perceiveSSSR(mol) if sssr.getSize() > 0: if max([frag.getNumAtoms() for frag in sssr]) > ring_size: return True return False
def remove_metal_salts(mol: Chem.BasicMolecule) -> Chem.BasicMolecule: to_remove = [] for atom in mol.atoms: if Chem.getType(atom) not in SALT_METALS: continue else: to_remove.append(mol.getAtomIndex(atom)) to_remove.sort() to_remove.reverse() for index in to_remove: mol.removeAtom(index) return mol
def read_all(self): """ Reads all the molecules from the SDF file with set properties :return: """ mols = {} for i, mol in enumerate(self): name = Chem.getName(mol) if len(name) == 0: # no name set name = str(i) mols[name] = mol return mols