def run(self): stream = self._patch_molecule() pdbcomplex = PDBComplex() pdbcomplex.load_pdb(stream.getvalue(), as_string=True) pdbcomplex.analyze() pdbcomplex.sourcefiles['filename'] = '/dev/null' stream.close() self.complex = pdbcomplex
def plip_read_pdb_complex(path): mol = PDBComplex() mol.load_pdb(path) mol.analyze() assert (len(mol.interaction_sets) == 1) inter = mol.interaction_sets.values()[0] return inter
def plip_analysis(pdbpath, pdb_id): ''' run PLIP analysis for pdb file ''' mol = PDBComplex() mol.load_pdb(pdbpath) chain_id = pdb_id.split('_')[-1] ligand_interactions_dict = {} # invastigate only proteins with following ligands ligand_whitelist = set([ 'NAD', 'FAD', 'NDP', 'NAP', 'NAI', 'ADP', 'AMP', 'ATP', 'FMN', 'GTP', 'ADN', 'SAH', 'GDP', '5GP', 'ADX', 'LSS', '1ZZ', 'COA', 'ACO', '01A', 'HMG', 'SAI', 'GRA', 'ST9', 'MCA', 'SAM', 'HEM', 'CLA', 'BCB', 'BCL', 'HEA', 'HEC', 'HEB', 'HAS' ]) # plip analysis ligand_list_from_mol = [x.hetid for x in mol.ligands] print('=> ' + str(len(ligand_list_from_mol)) + ' ligands to process') # if any ligand of interest present # if not ligand_whitelist.intersection(set(ligand_list_from_mol)): # print('=> No ligands of interest found') # #print(ligand_list_from_mol) # return ligand_interactions_dict mol.analyze() full_data_mol[pdb_id] = mol for bsid in [ ":".join([x.hetid, x.chain, str(x.position)]) for x in mol.ligands ]: # for every ligand encountered in pdb print bsid #print '|', interactions = mol.interaction_sets[ bsid] # Contains all interaction data print interactions.interacting_res interacting_residues = [ int(res[:-1]) for res in interactions.interacting_res if res[-1] == chain_id ] if interacting_residues: ligand_interactions_dict[bsid] = interacting_residues print '=> ' + str( len(ligand_interactions_dict )) + ' ligands interacting with chain ' + chain_id + ' found' print return ligand_interactions_dict
def plip_analysis_targets(pdbpath, pdb_id, pi_helix_pdb_ids): mol = PDBComplex() mol.load_pdb(pdbpath) chain_id = pdb_id.split('_')[-1] ligand_interactions = [] # plip analysis ligand_list_from_mol = [x.hetid for x in mol.ligands] print('=> ' + str(len(ligand_list_from_mol)) + ' ligands to process') mol.analyze() full_data_mol[pdb_id] = mol print(target_pdb_plip_data[pdb_id]) for bsid in [ ":".join([x.hetid, x.chain, str(x.position)]) for x in mol.ligands ]: # for every ligand encountered in pdb print bsid if bsid.split(':')[0] not in target_pdb_plip_data[pdb_id].keys(): print 'BSID not in PLIP data', bsid continue if (not target_pdb_plip_data[pdb_id][bsid.split(':')[0]] or bsid.split(':')[1] != chain_id): continue print 'passed filters:', bsid #print '|', interactions = mol.interaction_sets[ bsid] # Contains all interaction data pi_helix_pdb_ids_all = [res for reg in pi_helix_pdb_ids for res in reg] #print pi_helix_pdb_ids #print pi_helix_pdb_ids_all for inter in interactions.all_itypes: if inter.resnr in pi_helix_pdb_ids_all: inter_type = str(type(inter)).strip('<>\'').split('.')[-1] print inter_type, inter.resnr, inter.restype, inter.restype_l ligand_interactions.append(inter) return ligand_interactions
class Analyze: def __init__(self, protein, ligand, within=7.5): self.protein = protein self.ligand = ligand genASite = GenActiveSite(within=within) self.asite = genASite(self.protein, self.ligand) renumberResidues(self.asite) log(f'nAtoms of asite {self.asite.GetNumAtoms()}') complex = Chem.CombineMols(self.asite, self.ligand) self.pmol = PDBComplex() cont = Chem.MolToPDBBlock(complex).replace('UNL 1', 'MOL 1') self.pmol.load_pdb(cont, as_string=True) self.pmol.analyze() def getPLIPComplexToProteinMap(self): N = self.protein.GetNumAtoms() protein_coords = getCoordinates(self.protein) cpmap = {} for i in self.pmol.atoms: atom = self.pmol.get_atom(i) d = np.linalg.norm(np.tile(np.array(atom.coords), N).reshape( (N, 3)) - protein_coords, axis=1) minidx = d.argmin() if d[minidx] < 1e-3: cpmap[atom.idx] = minidx.item() return cpmap def getPLIPComplexToLigandMap(self): N = self.ligand.GetNumAtoms() ligand_coords = getCoordinates(self.ligand) clmap = {} for i in self.pmol.atoms: atom = self.pmol.get_atom(i) d = np.linalg.norm(np.tile(np.array(atom.coords), N).reshape( (N, 3)) - ligand_coords, axis=1) minidx = d.argmin() if d[minidx] < 1e-3: clmap[atom.idx] = minidx.item() return clmap def getPLIPComplexToASiteMap(self): N = self.asite.GetNumAtoms() asite_coords = getCoordinates(self.asite) camap = {} for i in self.pmol.atoms: atom = self.pmol.get_atom(i) d = np.linalg.norm(np.tile(np.array(atom.coords), N).reshape( (N, 3)) - asite_coords, axis=1) minidx = d.argmin() if d[minidx] < 1e-3: camap[atom.idx] = minidx.item() return camap def interactions(self): camap = self.getPLIPComplexToASiteMap() clmap = self.getPLIPComplexToLigandMap() for bsite, iacts in self.pmol.interaction_sets.items(): for ia in iacts.all_itypes: iact = Inter(ia) if not iact.isValid(): continue aidxs = list(map(lambda x: camap[x], iact.P())) lidxs = list(map(lambda x: clmap[x], iact.L())) yield bsite, iact.name, aidxs, lidxs
def analyze_with_plip(pdb): pdbcomplex = PDBComplex() pdbcomplex.load_pdb(pdb, as_string=True) pdbcomplex.analyze() pdbcomplex.sourcefiles['filename'] = '/dev/null' return pdbcomplex
def run_plip(pdb, pdb_filepath, dump=True, dumpdir='', resdf=False, max_res_count=4000): # input: one pdb index, (list of sequences to check - maybe process from all data later) # main: load pdb from path # out: status of job, save results in file during function run # out-format: dict of ligands: keys are ligands ids (name:chain:resnum) and values - # lists of dicts, each dict one interaction # out-format-alt: dataframe for pdb with ligands as rows and columns: # restype_l, reschain_l, resnr_l, inter_type, restype, reschain, resnr, inter_data(maybe without already present columns) if dumpdir != '' and dump: assert dumpdir[-1] == '/', "dumpdir must end with '/'" picklename = dumpdir + pdb + '.p' if dump: if os.path.isfile(picklename): print('=> ' + pdb + ': in cache') if resdf: return pd.read_pickle(picklename) else: return True # unzip if pdb is gzipped if pdb_filepath[-3:] == '.gz': try: pdb_filepath = get_unzipped_tempfile(pdb_filepath) except FileNotFoundError: print('=> ' + pdb + ': Gz file not found') return False gz = True else: gz = False # first try to parse the structure with Bio.PDB #parser = PDBParser(PERMISSIVE=0) #try: #structure = parser.get_structure('temp', pdb_filepath) #except PDBConstructionException: # print('=> ' + pdb + ': Problem with parsing') # return False # check the number of residues #res_count = sum([len(list(model.get_residues())) for model in structure]) #if res_count>=max_res_count: # print('=> ' + pdb + ': The input molecule is too big (%d residues)' % res_count) # return False ligand_interactions = [] # PLIP analysis mol = PDBComplex() try: mol.load_pdb(pdb_filepath) except: print('=> ' + pdb + ': File not found') return False ligand_list_from_mol = [x.hetid for x in mol.ligands] print('=> ' + pdb + ': ' + str(len(ligand_list_from_mol)) + ' ligands to process') if ligand_list_from_mol == 0: return False try: mol.analyze() except TypeError: print('=> ' + pdb + ': Problem with parsing') return False for bsid in [ ":".join([x.hetid, x.chain, str(x.position)]) for x in mol.ligands ]: # for every ligand encountered in pdb interactions = mol.interaction_sets[ bsid] # Contains all interaction data for inter in interactions.all_itypes: inter_parsed = plip_parse.parse_inter(inter) ligand_interactions.append( convert_interaction_for_df(pdb, inter_parsed)) # create df for pdb df = pd.DataFrame(ligand_interactions, columns=[ 'pdb', 'restype_l', 'reschain_l', 'resnr_l', 'inter_type', 'restype', 'reschain', 'resnr', 'inter_data' ]) if dump: pickle.dump(df, open(picklename, 'wb')) # dump df if gz: # del tempfile os.remove(pdb_filepath) if resdf: return df else: return True