def CDPLphaGenerator(protein, ligand, pha_type): ''' generates the pharmacophore for either the ligand, the environment or the interaction between them. Input: \n protein (CDPL Fragment): the CDPL protein fragment (=env) \n ligand (CDPL BasicMolecule): a molecule or a ligand in the corresponding protein pocket \n pha_type (string): either "lig_only", "env_only" or None - then its the interaction pharamcophore \n Return: \n (CDPL BasicPharmacophore): the corresponding pharmacophore ''' lig_pharm = None if pha_type is 'lig_only': Chem.perceiveSSSR(ligand, True) lig_pharm = _CDPLgeneratePha(ligand, pha_type) return lig_pharm Chem.perceiveSSSR(protein, True) env_pharm = None if pha_type is 'env_only': env_pharm = _CDPLgeneratePha(protein, pha_type) return env_pharm Chem.perceiveSSSR(ligand, True) lig_pharm = _CDPLgeneratePha(ligand, pha_type) env_pharm = _CDPLgeneratePha(protein, pha_type) mapping = Pharm.FeatureMapping() Pharm.DefaultInteractionAnalyzer().analyze(lig_pharm, env_pharm, mapping) int_pharm = Pharm.BasicPharmacophore() Pharm.buildInteractionPharmacophore(int_pharm, mapping) return int_pharm
def read_in_ph(ph_path, output_dir_path): fr = Pharm.PMLPharmacophoreReader(Base.FileIOStream(ph_path)) ph = Pharm.BasicPharmacophore() fr.read(ph) ph.pml_path = ph_path ph.dir_path = output_dir_path return ph
def __init__(self, lig_feature, env_feature): ftype_names = { Pharm.FeatureType.H_BOND_ACCEPTOR: 'HBA', Pharm.FeatureType.H_BOND_DONOR: 'HBD', Pharm.FeatureType.POS_IONIZABLE: 'PI', Pharm.FeatureType.NEG_IONIZABLE: 'NI', Pharm.FeatureType.AROMATIC: 'AR', Pharm.FeatureType.HYDROPHOBIC: 'H', Pharm.FeatureType.X_VOLUME: 'XV' } lig_feature_type = ftype_names[Pharm.getType(lig_feature)] lig_residue_code = Biomol.getResidueCode( Pharm.getSubstructure(lig_feature).atoms[0]) lig_residue_number = Biomol.getResidueSequenceNumber( Pharm.getSubstructure(lig_feature).atoms[0]) lig_residue_chain = Biomol.getChainID( Pharm.getSubstructure(lig_feature).atoms[0]) env_feature_type = ftype_names[Pharm.getType(env_feature)] env_residue_code = Biomol.getResidueCode( Pharm.getSubstructure(env_feature).atoms[0]) env_residue_number = Biomol.getResidueSequenceNumber( Pharm.getSubstructure(env_feature).atoms[0]) env_residue_chain = Biomol.getChainID( Pharm.getSubstructure(env_feature).atoms[0]) self.interaction_type = '{}-{}'.format(lig_feature_type, env_feature_type) self.lig_residue = '{}_{}_{}'.format(lig_residue_code, lig_residue_number, lig_residue_chain) self.env_residue = '{}_{}_{}'.format(env_residue_code, env_residue_number, env_residue_chain) atoms = [] for atom in Pharm.getSubstructure(lig_feature).atoms: key_atom = '{}:{}'.format(Chem.getSymbol(atom), Biomol.getSerialNumber(atom)) atoms.append(key_atom) self.lig_atom = sorted(atoms, key=lambda k: int(k.split(':')[1])) atoms = [] for atom in Pharm.getSubstructure(env_feature).atoms: key_atom = '{}:{}'.format(Chem.getSymbol(atom), Biomol.getSerialNumber(atom)) atoms.append(key_atom) self.env_atom = sorted(atoms, key=lambda k: int(k.split(':')[1]))
def CDPLphaFromPML(pml_path): ''' reads a single CDPL BasicPharmacophore from an pml-file. Input: \n pml_path (string): path to the pml file \n Return: \n (CDPL BasicPharmacophore): the corresponding CDPL BasicPharmacophore ''' pha = Pharm.BasicPharmacophore() ifs = Base.FileIOStream(pml_path, 'r') pml_reader = Pharm.PMLPharmacophoreReader(ifs) if not pml_reader.read(pha): log.error("COULD NOT READ PML", pml_path) return False return pha
def encodePhaInfo2(surface, pha, invert=False): types = [-1, -1, -1, 0, 1, 2, 3, -1, -1, -1, -1, -1] invertedTypes = [-1, -1, -1, 1, 0, 3, 2, -1, -1, -1, -1, -1] typeCount = 4 encoding = np.full((len(surface), typeCount), np.inf) count = 0 for feature in pha: count = count + 1 featureType = Pharm.getType(feature) if invert: index = invertedTypes[featureType] else: index = types[featureType] if index < 0: continue featureCoords = np.array(Chem.get3DCoordinates(feature)) for i in range(len(surface)): pt = surface[i] dist = np.linalg.norm(pt - featureCoords) encoding[i][index] = min(encoding[i][index], dist) print(count) for enc in encoding: minV = 0 for i in range(typeCount): if enc[minV] > enc[i]: minV = i # minDist = enc[minV] for i in range(typeCount): enc[i] = 0 # if minDist < 20: enc[minV] = 1 return encoding
def _CDPLgeneratePha(mol, pha_type): ''' PRIVAT METHOD generates the pharmacophore for the molecule and is used by the CDPLphaGenerator. Input: \n mol (CDPL BasicMolecule): the molecule the pharmacophore needs to be generated for lig_only (string): either True, then there are is no hydrogens coordinates being calculated \n Return: \n (CDPL BasicPharmacophore): the corresponding pharmacophore ''' if pha_type is not 'lig_only': #TODO What exactly should be in the config for the pha generation? Chem.generateHydrogen3DCoordinates(mol, True) pharm = Pharm.BasicPharmacophore() pharm_generator = Pharm.DefaultPharmacophoreGenerator(True) pharm_generator.generate(mol, pharm) return pharm
def getPh4InteractionDictionary(cdf_path, ligand_code): ph4_interaction_dictionary = {} cdf_mol = loadCDFMolecule(cdf_path) num_confs = Chem.getNumConformations(cdf_mol) ligand = Chem.Fragment() for atom in cdf_mol.atoms: if Biomol.getResidueCode(atom) == ligand_code: Biomol.extractResidueSubstructure(atom, cdf_mol, ligand, False) break if ligand.numAtoms == 0: print('> Could not find ligand {}'.format(ligand_code)) return 0 Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(True) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() for y in range(num_confs): lig_pharm.clear() env_pharm.clear() interactions.clear() lig_env.clear() coords_func = Chem.AtomConformer3DCoordinatesFunctor(y) pharm_gen.setAtom3DCoordinatesFunction(coords_func) Biomol.extractEnvironmentResidues(ligand, cdf_mol, lig_env, coords_func, 7) Chem.perceiveSSSR(lig_env, True) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) analyzer.analyze(lig_pharm, env_pharm, interactions) ph4_interaction_dictionary[y] = getPh4Interactions( lig_pharm, interactions) return ph4_interaction_dictionary
def savePharmacophore(pha, path): ''' Saves a particula pha at the target path.\n Input:\n pha (CDPL BasicPharmacophore): the pharmacophore to be saved as a pml file \n path (String): path where to save the pml file (includes the filename.pml) ''' Pharm.PMLFeatureContainerWriter(Base.FileIOStream(path, 'w')).write(pha) return True
def generate_key(ftr): first_atom = Pharm.getSubstructure(ftr).atoms[0] base = str(ftype_names[Pharm.getType(ftr)]) + '[' + str( Biomol.getResidueCode(first_atom)) + '_' + str( Biomol.getResidueSequenceNumber(first_atom)) + '_' + str( Biomol.getChainID(first_atom)) atoms_list = [] for a in Pharm.getSubstructure(ftr).atoms: if Biomol.hasSerialNumber(a) == False: continue atom_id = str(Biomol.getSerialNumber(a)) atoms_list.append(atom_id) atom_key = "" for k in sorted(atoms_list, key=natural_sort_key, reverse=True): atom_key += '_' + k key = base + atom_key + ']' return key
def outputInteractions(lig_pharm, env_pharm, interactions, df_constructor): i = 0 interaction_at_ts = dict() for lig_ftr in lig_pharm: if Pharm.hasSubstructure(lig_ftr) == False: continue elif ftype_names[Pharm.getType(lig_ftr)] == 'XV': continue elif len(interactions.getValues(lig_ftr)) < 1: continue ligand_key = generate_key(lig_ftr) print 'Ligand feature : ' + str(ligand_key) + ' interacts with: ' env_ftrs = interactions.getValues(lig_ftr) if df_constructor.has_key(ligand_key): dic_of_env_key = df_constructor[ligand_key] else: dic_of_env_key = {} dic_of_env_key_at_ts = {} for env_ftr in env_ftrs: if Pharm.hasSubstructure(env_ftr) == False: continue elif ftype_names[Pharm.getType(lig_ftr)] == 'XV': continue env_key = generate_key(env_ftr) if dic_of_env_key.has_key(env_key): dic_of_env_key_at_ts[env_key] = 1 dic_of_env_key[env_key] += 1 else: dic_of_env_key[env_key] = 1 dic_of_env_key_at_ts[env_key] = 1 print ' - ' + str(env_key) df_constructor[ligand_key] = dic_of_env_key interaction_at_ts[ligand_key] = dic_of_env_key_at_ts return df_constructor, interaction_at_ts
def write_ph_for_rpms(self, rpm_maps, output_directory): for fv in rpm_maps: directory = output_directory + '/' + str(fv) if not os.path.exists(directory): os.makedirs(directory) for ph_key in rpm_maps[fv]: ph_to_write = directory + '/ph_' + str(fv) + '_' + str( ph_key) + '.pml' print '- Writing pharmacophore: ' + str(ph_to_write) Pharm.PMLFeatureContainerWriter( Base.FileIOStream(ph_to_write, 'w')).write(rpm_maps[ph_key])
def create_pha_spheres(pha, radius=0.5): colors = [[1, 1, 1], [1, 1, 1], [1, 1, 1], [0, 1, 1], [0, 1, 0], [1, 0, 1], [1, 1, 0], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]] vis_list = [] for feature in pha: featureType = Pharm.getType(feature) featureCoords = np.array(Chem.get3DCoordinates(feature)) mesh_sphere = o3d.geometry.TriangleMesh.create_sphere(radius=radius) mesh_sphere.compute_vertex_normals() mesh_sphere.paint_uniform_color(np.array(colors[featureType])) mesh_sphere.translate(featureCoords) vis_list.append(mesh_sphere) return vis_list
def calculateStandardProperties(mol): standardProperties = { 'nrAcceptors': [], 'nrDonors': [], # 'nrRings': [], 'nrRotBonds': [], 'molWeight': [], 'nrHeavyAtoms': [], 'cLogP': [], 'TPSA': [], } try: iter(mol) except: mol = [mol] for m in mol: Chem.calcTopologicalDistanceMatrix(m, True) p = getPharmacophore(m) hba, hbd = 0, 0 for f in p: if Pharm.getType(f) == Pharm.FeatureType.H_BOND_ACCEPTOR: hba += 1 elif Pharm.getType(f) == Pharm.FeatureType.H_BOND_DONOR: hbd += 1 standardProperties['nrAcceptors'].append(hba) standardProperties['nrDonors'].append(hbd) standardProperties['molWeight'].append(Chem.calcExplicitMass(m)) standardProperties['nrHeavyAtoms'].append(Chem.getHeavyAtomCount(m)) standardProperties['cLogP'].append(Chem.calcXLogP(m)) standardProperties['TPSA'].append(Chem.calcTPSA(m)) standardProperties['nrRotBonds'].append( Chem.getRotatableBondCount(m, False, False)) return standardProperties
def encodePhaInfo(surface, pha, invert=False): types = [3, 4, 5, 6] invertedTypes = [4, 3, 6, 5] encoding = np.zeros((len(surface), len(types))) for feature in pha: featureType = Pharm.getType(feature) if featureType not in types: continue featureCoords = np.array(Chem.get3DCoordinates(feature)) for i in range(len(surface)): pt = surface[i] dist = np.linalg.norm(pt - featureCoords) if invert: index = invertedTypes.index(featureType) else: index = types.index(featureType) encoding[i][index] = max(encoding[i][index], 1 / (1 + dist)) return encoding
def _generateNodes(self, pha): ''' PRIVATE METHOD generates the nodes of the graph \n Input \n pha (CDPL BasicPharmacophore): pha the graph is based on ''' index_counter = 0 for feature in pha: node = PhaNode() node.feature_type = self._getAllowedSet(Pharm.getType(feature), ELEM_LIST) node.coords[0] = round(Chem.get3DCoordinates(feature)[0], 6) node.coords[1] = round(Chem.get3DCoordinates(feature)[1], 6) node.coords[2] = round(Chem.get3DCoordinates(feature)[2], 6) node.index = index_counter index_counter += 1 self.nodes.append(node)
def process(sdf_file, psd_file_path): ifs = Base.FileIOStream(sdf_file, 'r') reader = Chem.SDFMoleculeReader(ifs) mol = Chem.BasicMolecule() Chem.setMultiConfImportParameter(reader, True) psd_creator = Pharm.PSDScreeningDBCreator( psd_file_path, Pharm.PSDScreeningDBCreator.CREATE, True) i = 0 t0 = time.clock() while reader.read(mol): setupMolecule(mol) psd_creator.process(mol) i += 1 if i % 100 == 0: print 'Processed ' + str(i) + ' molecules (' + str( time.clock() - t0), 's elapsed)...' t0 = time.clock() mol.clear() print '' print '-- Summary --' print 'Molecules processed: ' + str(psd_creator.numProcessed) print 'Molecules rejected: ' + str(psd_creator.numRejected) print 'Molecules deleted: ' + str(psd_creator.numDeleted) print 'Molecules inserted: ' + str(psd_creator.numInserted) psd_creator.close()
for e in shape: e.setRadius(e.getRadius() * scaleFactor) shapeFunc.setMaxOrder(6) shapeFunc.setShape(shape) return shape, shapeFunc def getShapeWithIncreasedRadius(mol, increase=0.5): shape = Shape.GaussianShape() shapeFunc = Shape.GaussianShapeFunction() Shape.generateGaussianShape(mol, shape, inc_h=True) for e in shape: e.setRadius(e.getRadius() + increase) shapeFunc.setMaxOrder(6) shapeFunc.setShape(shape) return shape, shapeFunc path = '../Data/benchmark5.5/structures/' p = Protein() p.fromFile('{}1A2K_l_b.pdb'.format(path)) # remove ligands and other crystalization artifacts p.removeLigands() sanitized = sanitize_mol(p, makeHydrogenComplete=True) Pharm.prepareForPharmacophoreGeneration(p) Chem.generateHydrogen3DCoordinates(p, True) for i in range(10): scale = 1 + i / 10 shape, shapeFunc = getShape(p, scaleFactor=scale) print(scale, shapeFunc.surfaceArea)
def generate_ph(pdb, key): ifs = Base.FileIOStream(pdb, 'r') tlc = self.ligand_3_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(False) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) #------------------------- XVOLS int_env_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_env_ftrs, interactions, False) int_core_ftrs = Pharm.FeatureSet() Pharm.getFeatures(int_core_ftrs, interactions, True) int_pharm = Pharm.BasicPharmacophore(int_core_ftrs) for ftr in int_env_ftrs: if Pharm.getType( ftr ) == Pharm.FeatureType.H_BOND_DONOR or Pharm.getType( ftr) == Pharm.FeatureType.H_BOND_ACCEPTOR: Pharm.setTolerance(ftr, 1.0) else: Pharm.setTolerance(ftr, 1.5) Pharm.createExclusionVolumes(int_pharm, int_env_ftrs, 0.0, 0.1, False) int_env_ftr_atoms = Chem.Fragment() Pharm.getFeatureAtoms(int_env_ftrs, int_env_ftr_atoms) int_residue_atoms = Chem.Fragment() Biomol.extractResidueSubstructures(int_env_ftr_atoms, lig_env, int_residue_atoms, True) Chem.makeHydrogenDeplete(int_residue_atoms) def isAlphaAtom(atom): return Biomol.getResidueAtomName(atom) == 'CA' Chem.removeAtomsIfNot(int_residue_atoms, isAlphaAtom) Pharm.createExclusionVolumes(int_pharm, int_residue_atoms, Chem.Atom3DCoordinatesFunctor(), 1.0, 2.0, False) features_in_ph = [] for int_ftr in int_pharm: if Pharm.hasSubstructure(int_ftr) == False: continue elif ftype_names[Pharm.getType(int_ftr)] == 'XV': continue feature_id = generate_key(int_ftr) features_in_ph.append(str(feature_id)) self.unique_feature_vector.add(str(feature_id)) int_pharm.fv = features_in_ph int_pharm.path_to_pdb = pdb return int_pharm
def generate_ph(pdb, args, df_constructor, ts): ifs = Base.FileIOStream(pdb, 'r') tlc = args.ligand_three_letter_code pdb_reader = Biomol.PDBMoleculeReader(ifs) pdb_mol = Chem.BasicMolecule() print '- Reading input: ', pdb, ' ...' if not pdb_reader.read(pdb_mol): print '!! Could not read input molecule' return print '- Processing macromolecule', pdb, ' ...' i = 0 while i < pdb_mol.getNumBonds(): bond = pdb_mol.getBond(i) if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]): pdb_mol.removeBond(i) else: i += 1 for a in pdb_mol.atoms: Chem.setImplicitHydrogenCount(a, 0) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Chem.perceiveHybridizationStates(pdb_mol, True) Chem.makeHydrogenComplete(pdb_mol) Chem.setAtomSymbolsFromTypes(pdb_mol, False) Chem.calcImplicitHydrogenCounts(pdb_mol, True) Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False) Chem.setRingFlags(pdb_mol, True) Chem.setAromaticityFlags(pdb_mol, True) Chem.generateHydrogen3DCoordinates(pdb_mol, True) Chem.calcFormalCharges(pdb_mol, True) ligand = Chem.Fragment() print '- Extracting ligand ', tlc, ' ...' for atom in pdb_mol.atoms: if Biomol.getResidueCode(atom) == tlc: Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False) break if ligand.numAtoms == 0: print '!! Could not find ligand', tlc, 'in input file' return Chem.perceiveSSSR(ligand, True) lig_env = Chem.Fragment() Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0) Chem.perceiveSSSR(lig_env, True) print '- Constructing pharmacophore ...' lig_pharm = Pharm.BasicPharmacophore() env_pharm = Pharm.BasicPharmacophore() pharm_gen = Pharm.DefaultPharmacophoreGenerator(True) pharm_gen.generate(ligand, lig_pharm) pharm_gen.generate(lig_env, env_pharm) #Pharm.FilePMLFeatureContainerWriter('./test/lig_ph_' + str(ts) + '.pml').write(lig_pharm) analyzer = Pharm.DefaultInteractionAnalyzer() interactions = Pharm.FeatureMapping() analyzer.analyze(lig_pharm, env_pharm, interactions) df_constructor, interaction_at_ts = outputInteractions( lig_pharm, env_pharm, interactions, df_constructor) #Chem.FileSDFMolecularGraphWriter('./test/ligand_' + str(ts) + '.sdf').write(ligand) return df_constructor, interaction_at_ts