def get_interacting_atoms(parsed, resi, resn): if resi[1:].isdigit(): # removes all insertion codes target = parsed.select('chain %s and resnum %s_' % (resi[0], resi[1:])) else: # has insertion code target = parsed.select('chain %s and resnum %s' % (resi[0], resi[1:].upper())) assert len(list(set(target.getResnames()))) == 1 assert constants.one_letter_code[target.getResnames()[0]] == resn # find out if interaction is BB or SC other_chain = parsed.select('not (chain {})'.format(resi[0], resi[1:])) polar = ['O', 'N'] interacting_atoms = [] for atom in target: radius = 3.5 if atom.getName()[0] in polar: radius = 3.5 else: radius = 4.8 for nbr in pr.findNeighbors(atom, radius, other_chain): ifgatom, vdmatom, dist = nbr ifgindex, vdmindex = ifgatom.getIndex(), vdmatom.getIndex() ifgatomelem, vdmatomelem = ifgatom.getName(), vdmatom.getName() if ifgatom.getElement() != 'H' and vdmatom.getElement() != 'H': if dist <= 3.5: interacting_atoms.append((ifgindex, vdmindex)) else: if ifgatomelem[0] == 'C' and vdmatomelem[0] == 'C': interacting_atoms.append((ifgindex, vdmindex)) return list(set(interacting_atoms))
def get_interacting_atoms(parsed, resi, resn): target = parsed.select('chain %s and resnum %s' % (resi[0], resi[1:])) assert len(list(set(target.getResnames()))) == 1 assert constants.one_letter_code[target.getResnames()[0]] == resn # find out if interaction is BB or SC other_chain = parsed.select('not chain %s' % resi[0]) bb = ['C', 'O', 'OXT', 'CA', 'N'] polar = ['O', 'N'] interacting_atoms = [] #print(resi, resn) for atom in target: radius = 3.5 #if atom.getName()[0] in polar: # radius = 3.5 #else: # radius = 4.8 for nbr in pr.findNeighbors(atom, radius, other_chain): ifgatom, vdmatom, dist = nbr ifgindex, vdmindex = ifgatom.getIndex(), vdmatom.getIndex() ifgatomelem, vdmatomelem = ifgatom.getName(), vdmatom.getName() if dist <= 3.5: interacting_atoms.append((ifgindex, vdmindex)) #else: # if ifgatomelem[0]=='C' and vdmatomelem[0]=='C': # interacting_atoms.append(vdmindex) # print(ifgatom, vdmatom, vdmatom.getResname()) return list(set(interacting_atoms))
def set_contacts(self, calpha_distance=10.5): """Sets contacts between surface residues. A contact is by default defined as a pair of C alpha atoms with a distance less than 10 angstroms.""" self.nbrs_f = pr.findNeighbors(self.surf_sel_f, calpha_distance) self.nbrs_r = pr.findNeighbors(self.surf_sel_r, calpha_distance) self.contacts_f = list() for nbr in self.nbrs_f: resnum_0 = nbr[0].getResnum() resnum_1 = nbr[1].getResnum() filter = True if np.abs(resnum_0 - resnum_1) > 6: resind_0 = nbr[0].getResindex() resind_1 = nbr[1].getResindex() ca_0 = nbr[0] cb_0 = self.pdb_f.select('name CB and resindex ' + str(resind_0)) ca_1 = nbr[1] cb_1 = self.pdb_f.select('name CB and resindex ' + str(resind_1)) ang1 = pr.calcAngle(ca_0, cb_0, cb_1) ang2 = pr.calcAngle(ca_1, cb_1, cb_0) if (ang1 < 80) or (ang2 < 80): filter = False if filter: self.contacts_f.append((resnum_0, resnum_1)) self.contacts_r = list() for nbr in self.nbrs_r: resnum_0 = nbr[0].getResnum() resnum_1 = nbr[1].getResnum() filter = True if np.abs(resnum_0 - resnum_1) > 6: resind_0 = nbr[0].getResindex() resind_1 = nbr[1].getResindex() ca_0 = nbr[0] cb_0 = self.pdb_r.select('name CB and resindex ' + str(resind_0)) ca_1 = nbr[1] cb_1 = self.pdb_r.select('name CB and resindex ' + str(resind_1)) ang1 = pr.calcAngle(ca_0, cb_0, cb_1) ang2 = pr.calcAngle(ca_1, cb_1, cb_0) if (ang1 < 80) or (ang2 < 80): filter = False if filter: self.contacts_r.append((resnum_0, resnum_1))
def freqaai(ligand_vdm_pair, ifg_name, ifgatoms, lookup_dir): parsed = pr.parsePDB(ligand_vdm_pair) # automatically select vdm v = parsed.select('chain X and resnum 10 and not element H D') vdm_name = v.getResnames()[0] # find out if interaction is BB or SC bb = ['C', 'O', 'OXT', 'CA', 'N'] polar = ['O', 'N'] ifg = parsed.select('resnum 1 and name %s'%(' '.join(ifgatoms))) vdmatoms = [] for atom in ifg: if atom.getName()[0] in polar: radius = 3.5 else: radius = 4.8 for nbr in pr.findNeighbors(atom, radius, v): ifgatom, vdmatom, dist = nbr ifgatom, vdmatom = ifgatom.getName(), vdmatom.getName() if dist <= 3.5: vdmatoms.append(vdmatom) else: if ifgatom[0]=='C' and vdmatom[0]=='C': vdmatoms.append(vdmatom) vdmatoms = list(set(vdmatoms)) bbinteraction = sum([x in bb for x in vdmatoms]) scinteraction = sum([x not in bb for x in vdmatoms]) # get lookup info for ifg pklfile = lookup_dir + 'AAi_freq_combed_%s.pkl'%ifg_name lookup = pkl.load(open(pklfile,'rb')) # get database frequencies db_dict = analysis.EnergyTerms.AAi_db_lookup(lookup_dir) if bbinteraction > 0: score = lookup.ix[vdm_name, 'vdm_freq_bb'] bbscore = -np.log10(score / db_dict[vdm_name]) else: bbscore = None if scinteraction > 0: score = lookup.ix[vdm_name, 'vdm_freq_sc'] scscore = -np.log10(score / db_dict[vdm_name]) else: scscore = None return bbscore, scscore
def find_contact_atoms(self, parsed_pdb, comb, radius1=3.5, radius2=4.8): """Takes a prody contacts object and a radius (integer) and finds the neighboring atoms of the iFG. The atoms are saved as all, protein, waters, ligands, and metals. The atoms do not include atoms of the iFG residue itself. parsed_pdb: an instance of class ParsedPDB having attributes .contacts, .fs_struct, .fs_result, .dssp radius: an integer """ if comb.probe_path: try: scratch_dir = comb.scratch or comb.output_dir_csv resnum = np.unique(self.sele.getResnums())[0] chain = np.unique(self.sele.getChids())[0] resname = np.unique(self.sele.getResnames())[0] ifg_dict = collections.defaultdict(list) selfsele = '"seg___A chain' + chain + ' ' + str(resnum) + ' ' + comb.probe_ifgatoms[resname] + '"' targsele = '"seg___A not (' + str(resnum) + ' chain' + chain + ')"' # Probe output abbr: wc: wide contact, cc: close contact, so: small overlap, bo: bad overlap, hb: H-bonds with os.popen(comb.probe_path + 'probe -quiet -MC -docho -condense -unformated -once ' + selfsele + ' ' + targsele + ' ' + comb.input_dir_pdb + parsed_pdb.pdb_acc_code + 'H.pdb') as infile: for line in infile: spl = line.split(':')[1:] probe_type = spl[1] ifg_name = spl[2][10:15].strip() ifg_atomtype = spl[12] vdm_chid = spl[3][:2].strip() vdm_resnum = int(spl[3][2:6]) vdm_resname = spl[3][6:10].strip() if vdm_resname == 'HOH': vdm_name = 'O' else: vdm_name = spl[3][10:15].strip() vdm_atomtype = spl[13] pr_score = spl[11] ifg_dict['contacts'].append( (ifg_name, ifg_atomtype, 'A', vdm_chid, vdm_resnum, vdm_resname, vdm_name, vdm_atomtype, pr_score)) self.contact_dict[(vdm_resnum, vdm_chid, 'A')].add(ifg_name) self.contact_pair_dict[(vdm_resnum, vdm_chid, 'A')].append((ifg_name, vdm_name, spl[1], pr_score)) if probe_type == 'hb': ifg_dict['hbonds'].append( (ifg_name, ifg_atomtype, 'A', vdm_chid, vdm_resnum, vdm_resname, vdm_name, vdm_atomtype)) for seg in parsed_pdb.segnames[1:]: s = '____' s = s[:-len(seg):] + seg selfsele = '"seg___A chain' + chain + ' ' + str(resnum) + ' ' + comb.probe_ifgatoms[resname] + '"' targsele = '"seg' + s + '"' with os.popen(comb.probe_path + 'probe -quiet -MC -docho -condense -unformated -once ' + selfsele + ' ' + targsele + ' ' + comb.input_dir_pdb + parsed_pdb.pdb_acc_code + 'H.pdb') as infile: for line in infile: spl = line.split(':')[1:] probe_type = spl[1] ifg_name = spl[2][10:15].strip() ifg_atomtype = spl[12] vdm_chid = spl[3][:2].strip() vdm_resnum = int(spl[3][2:6]) vdm_resname = spl[3][6:10].strip() if vdm_resname == 'HOH': vdm_name = 'O' else: vdm_name = spl[3][10:15].strip() vdm_atomtype = spl[13] pr_score = spl[11] self.contact_dict[(vdm_resnum, vdm_chid, seg)].add(ifg_name) self.contact_pair_dict[(vdm_resnum, vdm_chid, seg)].append((ifg_name, vdm_name, spl[1], pr_score)) ifg_dict['contacts'].append( (ifg_name, ifg_atomtype, seg, vdm_chid, vdm_resnum, vdm_resname, vdm_name, vdm_atomtype, pr_score)) if probe_type == 'hb': ifg_dict['hbonds'].append( ( ifg_name, ifg_atomtype, seg, vdm_chid, vdm_resnum, vdm_resname, vdm_name, vdm_atomtype)) contact_atom_seles = [parsed_pdb.prody_pdb.select('segment ' + c[2] + ' chain ' + c[3] + ' resnum ' + str(c[4]) + ' name ' + c[6]) for c in ifg_dict['contacts']] self.probe_hbonds = ifg_dict['hbonds'] if contact_atom_seles != []: self.contact_atoms_all = reduce(lambda a, b: a | b, contact_atom_seles) nbrs = pr.findNeighbors(self.sele, 4.8, self.contact_atoms_all) nbrs_info = [(nbr[1], nbr[1].getResindex(), nbr[1].getResnum(), nbr[1].getChid(), nbr[0].getName(), nbr[1].getName(), nbr[2]) for nbr in nbrs] nbrs_info_full = sorted(nbrs_info, key=lambda x: x[1]) contact_names_dist = {} resind_resnum_chids = [] for resind_resnum_chid, contact_group in itertools.groupby(nbrs_info_full, key=lambda x: x[1:4]): contact_names_dist[resind_resnum_chid[0]] = [c[4:] for c in contact_group] resind_resnum_chids.append(resind_resnum_chid) else: self.contact_atoms_all = None except Exception: traceback.print_exc() self.contact_atoms_all = None else: nbrs = pr.findNeighbors(self.sele, radius2, parsed_pdb.prody_pdb.select('not element H D and not resindex ' + ' '.join(str(ri) for ri in self.resindex))) if nbrs: nbrs_info = [(nbr[1], nbr[1].getResindex(), nbr[1].getResnum(), nbr[1].getChid(), nbr[0].getName(), nbr[1].getName(), nbr[2]) for nbr in nbrs] nbrs_info_3p5 = [c for c in nbrs_info if c[-1] <= radius1] nbrs_info_4p8 = [c for c in nbrs_info if c[-1] > radius1 and c[-2][0] == 'C'] nbrs_info_full = sorted(nbrs_info_3p5 + nbrs_info_4p8, key=lambda x: x[1]) if nbrs_info_full: contact_names_dist = {} resind_resnum_chids = [] for resind_resnum_chid, contact_group in itertools.groupby(nbrs_info_full, key=lambda x: x[1:4]): contact_names_dist[resind_resnum_chid[0]] = [c[4:] for c in contact_group] resind_resnum_chids.append(resind_resnum_chid) self.contact_atoms_all = self.get_contact_atoms(nbrs_info_full) # selects the subsets of atoms if self.contact_atoms_all is not None: self.contact_atoms_water = self.contact_atoms_all.select('water') if self.contact_atoms_water is not None: self.contact_info_water = ' '.join('(' + b[0] + ' ' + b[1] + ' ' + '{0:.2f}'.format(b[2]) + ')' for c in [contact_names_dist[ri] for ri in np.unique(self.contact_atoms_water.getResindices())] for b in c) self.contact_atoms_ligand = self.contact_atoms_all.select('hetero and not (water or element NA MG CA K ' 'FE ZN CO CU NI MN MO V)') if self.contact_atoms_ligand is not None: self.contact_info_ligand = ' '.join('(' + b[0] + ' ' + b[1] + ' ' + '{0:.2f}'.format(b[2]) + ')' for c in [contact_names_dist[ri] for ri in np.unique(self.contact_atoms_ligand.getResindices())] for b in c) self.contact_atoms_metal = self.contact_atoms_all.select('element NA MG CA K FE ZN CO CU NI MN MO V') if self.contact_atoms_metal is not None: self.contact_info_metal = ' '.join('(' + b[0] + ' ' + b[1] + ' ' + '{0:.2f}'.format(b[2]) + ')' for c in [contact_names_dist[ri] for ri in np.unique(self.contact_atoms_metal.getResindices())] for b in c) self.contact_atoms_protein = self.contact_atoms_all.select('resname ' + resnames_aa_20_join) if self.contact_atoms_protein is not None: _resin, _ind = np.unique(self.contact_atoms_protein.getResindices(), return_index=True) self.contact_resnums = self.contact_atoms_protein.getResnums()[_ind].tolist() self.contact_chids = self.contact_atoms_protein.getChids()[_ind].tolist() self.contact_resindices = self.contact_atoms_protein.getResindices()[_ind].tolist() self.contact_segments = self.contact_atoms_protein.getSegnames()[_ind].tolist() self.contact_info_protein = [contact_names_dist[ri] for ri in self.contact_resindices] comb.ifg_count += 1
def find_contact_atoms(self, parsed_pdb, comb, radius1=3.5, radius2=4.8): """Takes a prody contacts object and a radius (integer) and finds the neighboring atoms of the iFG. The atoms are saved as all, protein, waters, ligands, and metals. The atoms do not include atoms of the iFG residue itself. parsed_pdb: an instance of class ParsedPDB having attributes .contacts, .fs_struct, .fs_result, .dssp radius: an integer """ # assert isinstance(self.sele, pr.atomic.selection.Selection), 'selection is not a valid prody selection' # assert isinstance(parsed_pdb.contacts, pr.measure.contacts.Contacts), 'pdb_contacts is not a valid parsed ' \ # 'prody contact object' # assert isinstance(radius1, float), 'radius needs to be a float' # atoms1 = parsed_pdb.contacts.select(radius1, self.sele).select('not element H D and not resindex ' # + ' '.join(str(ri) for ri in self.resindex)) # # # The following selects carbons within radius2 distance of any carbon in the iFG # atoms2 = parsed_pdb.contacts.select(radius2, self.sele.select('element C')).select('element C and not resindex ' # + ' '.join(str(ri) for ri in # self.resindex)) # if atoms1 is not None and atoms2 is not None: # self.contact_atoms_all = atoms1 | atoms2 # elif atoms1 is not None: # self.contact_atoms_all = atoms1 # elif atoms2 is not None: # self.contact_atoms_all = atoms2 nbrs = pr.findNeighbors(self.sele, radius2, parsed_pdb.prody_pdb.select('not element H D and not resindex ' + ' '.join(str(ri) for ri in self.resindex))) if nbrs: nbrs_info = [(nbr[1], nbr[1].getResindex(), nbr[1].getResnum(), nbr[1].getChid(), nbr[0].getName(), nbr[1].getName(), nbr[2]) for nbr in nbrs] nbrs_info_3p5 = [c for c in nbrs_info if c[-1] <= radius1] nbrs_info_4p8 = [c for c in nbrs_info if c[-1] > radius1 and c[-2][0] == 'C'] nbrs_info_full = sorted(nbrs_info_3p5 + nbrs_info_4p8, key=lambda x: x[1]) if nbrs_info_full: contact_names_dist = {} resind_resnum_chids = [] for resind_resnum_chid, contact_group in itertools.groupby(nbrs_info_full, key=lambda x: x[1:4]): contact_names_dist[resind_resnum_chid[0]] = [c[4:] for c in contact_group] resind_resnum_chids.append(resind_resnum_chid) self.contact_atoms_all = self.get_contact_atoms(nbrs_info_full) # selects the subsets of atoms if self.contact_atoms_all is not None: self.contact_atoms_water = self.contact_atoms_all.select('water') if self.contact_atoms_water is not None: self.contact_info_water = ' '.join('(' + b[0] + ' ' + b[1] + ' ' + '{0:.2f}'.format(b[2]) + ')' for c in [contact_names_dist[ri] for ri in np.unique(self.contact_atoms_water.getResindices())] for b in c) self.contact_atoms_ligand = self.contact_atoms_all.select('hetero and not (water or element NA MG CA K ' 'FE ZN CO CU NI MN MO V)') if self.contact_atoms_ligand is not None: self.contact_info_ligand = ' '.join('(' + b[0] + ' ' + b[1] + ' ' + '{0:.2f}'.format(b[2]) + ')' for c in [contact_names_dist[ri] for ri in np.unique(self.contact_atoms_ligand.getResindices())] for b in c) self.contact_atoms_metal = self.contact_atoms_all.select('element NA MG CA K FE ZN CO CU NI MN MO V') if self.contact_atoms_metal is not None: self.contact_info_metal = ' '.join('(' + b[0] + ' ' + b[1] + ' ' + '{0:.2f}'.format(b[2]) + ')' for c in [contact_names_dist[ri] for ri in np.unique(self.contact_atoms_metal.getResindices())] for b in c) self.contact_atoms_protein = self.contact_atoms_all.select('protein') if self.contact_atoms_protein is not None: _resin, _ind = np.unique(self.contact_atoms_protein.getResindices(), return_index=True) self.contact_resnums = self.contact_atoms_protein.getResnums()[_ind].tolist() self.contact_chids = self.contact_atoms_protein.getChids()[_ind].tolist() self.contact_resindices = self.contact_atoms_protein.getResindices()[_ind].tolist() self.contact_info_protein = [contact_names_dist[ri] for ri in self.contact_resindices] comb.ifg_count += 1