def _processPDB(self): """ Processes the PDB file, i.e. adds all relevant atoms to a dataframe and determines the peptide an phosphate chains""" self._logger.info("Processing PDB") ppb=CaPPBuilder() d = [] peptide_chains = {} phosphate_chains = set() # Loop over all chains for chain_idx, chain in enumerate(self._pdb[0]): isPeptideChain = False isPhosphateChain = False # try to create peptide sequence pp = ppb.build_peptides(chain) if pp: # tag chain as peptide chain isPeptideChain = True peptide_chains[chain.get_id()] = pp[0].get_sequence().tostring() # loop over residues in chain for residue in chain: resn = residue.get_resname() if resn in ['PTR', 'TPO', 'SEP']: # Chain contains a phospho-residue; tag as phosphateChain isPhosphateChain = True phosphate_chains.add(chain.get_id()) # process atoms only if residue is not water and is part of a peptide or phospho chain if residue.get_id()[0] != 'W' and (isPeptideChain or isPhosphateChain): resi = residue.get_id()[1] inscode = residue.get_id()[2].strip() hasPhosphate = False for atom in residue: vdw = -1 if isPeptideChain: elem = atom.element if elem: elem = elem if len(elem)==1 else elem[0]+elem[1].lower() vdw = self._periodicTable.GetRvdw(self._periodicTable.GetAtomicNumber(elem)) coords = atom.get_coord() sn = atom.get_serial_number() # append to dataframe d.append((chain.get_id().strip(), chain_idx, resn, resi, inscode, sn, atom.get_name(), isPeptideChain, isPhosphateChain, coords[0], coords[1], coords[2], vdw)) if resn in ['PTR', 'TPO', 'SEP'] and atom.get_name().strip() == 'P' and len(coords)==3: hasPhosphate = True if resn in ['PTR', 'TPO', 'SEP'] and not hasPhosphate: # del residue because no annotated phosphate d = d[:-len(residue)] if len(d) == 0: raise Exception('No amino acids found.') # save list to dataframe data = np.zeros((len(d), ), dtype=[('chain', 'a1'), ('chain_idx', 'a1'), ('resn', 'a3'), ('resi', 'i4'), ('inscode', 'a1'), ('sn', 'i4'), ('an', 'a4'), ('peptideChain', 'b'), ('phosphateChain', 'b'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('vdw', 'f4')]) data[:] = d idf = pd.DataFrame(data) idf[['peptideChain', 'phosphateChain']] = idf[['peptideChain', 'phosphateChain']].astype('bool') self._df = idf self._peptideChains = peptide_chains self._phosphateChains = phosphate_chains
def extract_seq_from_models(protien_name, file_name, fasta_to_write): print "Working with.....: ", file_name #GET the first model from the pdb and write to a temp file #the code based on BioPython lib to extract the 1-letter sequence works fast if we just one model from a large pdb #thus reducing the memory temp_pdb_file = "_temp_pdb.pdb" with open(temp_pdb_file, "a") as temp: with open(file_name) as ip: for line in ip: temp.write(line) if (line[0] == 'T'): break structure = PDBParser().get_structure(protien_name, temp_pdb_file) # Using CA-CA ppb = CaPPBuilder() for pp in ppb.build_peptides(structure): seq = pp.get_sequence() os.remove(fasta_to_write) with open(fasta_to_write, "a") as fasta: fasta.write(">" + protien_name + "_seq\n") fasta.write(str(seq)) ip.close() temp.close() fasta.close() os.remove(temp_pdb_file) print "Done, output file stored at: ", fasta_to_write
def __init__(self, model, radius=12.0, offset=0): """Initialize. A residue's exposure is defined as the number of CA atoms around that residues CA atom. A dictionary is returned that uses a L{Residue} object as key, and the residue exposure as corresponding value. :param model: the model that contains the residues :type model: L{Model} :param radius: radius of the sphere (centred at the CA atom) :type radius: float :param offset: number of flanking residues that are ignored in the calculation of the number of neighbors :type offset: int """ assert (offset >= 0) ppb = CaPPBuilder() ppl = ppb.build_peptides(model) fs_map = {} fs_list = [] fs_keys = [] for pp1 in ppl: for i in range(0, len(pp1)): fs = 0 r1 = pp1[i] if not is_aa(r1) or not r1.has_id('CA'): continue ca1 = r1['CA'] for pp2 in ppl: for j in range(0, len(pp2)): if pp1 is pp2 and abs(i - j) <= offset: continue r2 = pp2[j] if not is_aa(r2) or not r2.has_id('CA'): continue ca2 = r2['CA'] d = (ca2 - ca1) if d < radius: fs += 1 res_id = r1.get_id() chain_id = r1.get_parent().get_id() # Fill the 3 data structures fs_map[(chain_id, res_id)] = fs fs_list.append((r1, fs)) fs_keys.append((chain_id, res_id)) # Add to xtra r1.xtra['EXP_CN'] = fs AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
def __init__(self, model, radius=12.0, offset=0): """Initialize. A residue's exposure is defined as the number of CA atoms around that residues CA atom. A dictionary is returned that uses a L{Residue} object as key, and the residue exposure as corresponding value. :param model: the model that contains the residues :type model: L{Model} :param radius: radius of the sphere (centred at the CA atom) :type radius: float :param offset: number of flanking residues that are ignored in the calculation of the number of neighbors :type offset: int """ assert(offset >= 0) ppb = CaPPBuilder() ppl = ppb.build_peptides(model) fs_map = {} fs_list = [] fs_keys = [] for pp1 in ppl: for i in range(0, len(pp1)): fs = 0 r1 = pp1[i] if not is_aa(r1) or not r1.has_id('CA'): continue ca1 = r1['CA'] for pp2 in ppl: for j in range(0, len(pp2)): if pp1 is pp2 and abs(i - j) <= offset: continue r2 = pp2[j] if not is_aa(r2) or not r2.has_id('CA'): continue ca2 = r2['CA'] d = (ca2 - ca1) if d < radius: fs += 1 res_id = r1.get_id() chain_id = r1.get_parent().get_id() # Fill the 3 data structures fs_map[(chain_id, res_id)] = fs fs_list.append((r1, fs)) fs_keys.append((chain_id, res_id)) # Add to xtra r1.xtra['EXP_CN'] = fs AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, check_knots=False, receptor=None, signprot=None): """ @param model: model @type model: L{Model} @param radius: HSE radius @type radius: float @param offset: number of flanking residues that are ignored in the calculation of the number of neighbors @type offset: int @param hse_up_key: key used to store HSEup in the entity.xtra attribute @type hse_up_key: string @param hse_down_key: key used to store HSEdown in the entity.xtra attribute @type hse_down_key: string @param angle_key: key used to store the angle between CA-CB and CA-pCB in the entity.xtra attribute @type angle_key: string """ assert(offset>=0) # For PyMOL visualization self.ca_cb_list=[] ppb=CaPPBuilder() ppl=ppb.build_peptides(model) hse_map={} hse_list=[] hse_keys=[] ### GP if model.get_id()!=0: model = model[0] residues_in_pdb,residues_with_proper_CA=[],[] if check_chain_breaks==True: # for m in model: for chain in model: for res in chain: # try: if is_aa(res): residues_in_pdb.append(res.get_id()[1]) # except: # if is_aa(chain): # residues_in_pdb.append(chain.get_id()[1]) # print('chain', chain, res) # break self.clash_pairs = [] self.chain_breaks = [] if check_knots: possible_knots = PossibleKnots(receptor, signprot) knot_resis = possible_knots.get_resnums() self.remodel_resis = {} for pp1 in ppl: for i in range(0, len(pp1)): residues_with_proper_CA.append(pp1[i].get_id()[1]) if i==0: r1=None else: r1=pp1[i-1] r2=pp1[i] if i==len(pp1)-1: r3=None else: r3=pp1[i+1] # This method is provided by the subclasses to calculate HSE result=self._get_cb(r1, r2, r3) if result is None: # Missing atoms, or i==0, or i==len(pp1)-1 continue pcb, angle=result hse_u=0 hse_d=0 ca2=r2['CA'].get_vector() residue_up=[] ### GP residue_down=[] ### GP for pp2 in ppl: for j in range(0, len(pp2)): try: if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]: pass else: raise Exception except: if pp1 is pp2 and abs(i-j)<=offset: # neighboring residues in the chain are ignored continue ro=pp2[j] if not is_aa(ro) or not ro.has_id('CA'): continue cao=ro['CA'].get_vector() d=(cao-ca2) if d.norm()<radius: if d.angle(pcb)<(math.pi/2): hse_u+=1 ### GP # Puts residues' names in a list that were found in the upper half sphere residue_up.append(ro) ### end of GP code else: hse_d+=1 ### GP # Puts residues' names in a list that were found in the lower half sphere residue_down.append(ro) ### end of GP code res_id=r2.get_id() chain_id=r2.get_parent().get_id() # Fill the 3 data structures hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle) hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle))) ### GP residue_up and residue_down added to hse_list hse_keys.append((chain_id, res_id)) # Add to xtra r2.xtra[hse_up_key]=hse_u r2.xtra[hse_down_key]=hse_d if angle_key: r2.xtra[angle_key]=angle ### GP checking for knots if check_knots: for knot in knot_resis: if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id(): print(pp1[i].get_parent().get_id(),pp1[i]) for r in residue_up: if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]: print('close: ', r.get_parent().get_id(),r) resi_range = [knot[1][1][0], knot[1][1][-1]] if knot[1][0] not in self.remodel_resis: self.remodel_resis[knot[1][0]] = [resi_range] else: if resi_range not in self.remodel_resis[knot[1][0]]: self.remodel_resis[knot[1][0]].append(resi_range) ### GP checking for atom clashes include_prev, include_next = False, False try: if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]: include_prev = True except: include_prev = False try: if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]: include_next = True except: include_next = False for atom in pp1[i]: ref_vector = atom.get_vector() for other_res in residue_up: try: if other_res==pp1[i-1] and include_prev==False: continue elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False: continue else: raise Exception except: for other_atom in other_res: other_vector = other_atom.get_vector() d = other_vector-ref_vector if d.norm()<2: if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1: clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0') else: clash_res1 = pp1[i]['CA'].get_bfactor() if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1: clash_res2 = float(str(other_res['CA'].get_bfactor())+'0') else: clash_res2 = other_res['CA'].get_bfactor() self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])]) if check_chain_breaks==True: for r in residues_in_pdb: if r not in residues_with_proper_CA: self.chain_breaks.append(r)
def __init__(self, model, radius, offset, hse_up_key, hse_down_key, angle_key=None): """ @param model: model @type model: L{Model} @param radius: HSE radius @type radius: float @param offset: number of flanking residues that are ignored in the calculation of the number of neighbors @type offset: int @param hse_up_key: key used to store HSEup in the entity.xtra attribute @type hse_up_key: string @param hse_down_key: key used to store HSEdown in the entity.xtra attribute @type hse_down_key: string @param angle_key: key used to store the angle between CA-CB and CA-pCB in the entity.xtra attribute @type angle_key: string """ assert (offset >= 0) # For PyMOL visualization self.ca_cb_list = [] ppb = CaPPBuilder() ppl = ppb.build_peptides(model) hse_map = {} hse_list = [] hse_keys = [] for pp1 in ppl: for i in range(0, len(pp1)): if i == 0: r1 = None else: r1 = pp1[i - 1] r2 = pp1[i] if i == len(pp1) - 1: r3 = None else: r3 = pp1[i + 1] # This method is provided by the subclasses to calculate HSE result = self._get_cb(r1, r2, r3) if result is None: # Missing atoms, or i==0, or i==len(pp1)-1 continue pcb, angle = result hse_u = 0 hse_d = 0 ca2 = r2['CA'].get_vector() for pp2 in ppl: for j in range(0, len(pp2)): if pp1 is pp2 and abs(i - j) <= offset: # neighboring residues in the chain are ignored continue ro = pp2[j] if not is_aa(ro) or not ro.has_id('CA'): continue cao = ro['CA'].get_vector() d = (cao - ca2) if d.norm() < radius: if d.angle(pcb) < (pi / 2): hse_u += 1 else: hse_d += 1 res_id = r2.get_id() chain_id = r2.get_parent().get_id() # Fill the 3 data structures hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle) hse_list.append((r2, (hse_u, hse_d, angle))) hse_keys.append((chain_id, res_id)) # Add to xtra r2.xtra[hse_up_key] = hse_u r2.xtra[hse_down_key] = hse_d if angle_key: r2.xtra[angle_key] = angle AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, check_knots=False, receptor=None, signprot=None): """ @param model: model @type model: L{Model} @param radius: HSE radius @type radius: float @param offset: number of flanking residues that are ignored in the calculation of the number of neighbors @type offset: int @param hse_up_key: key used to store HSEup in the entity.xtra attribute @type hse_up_key: string @param hse_down_key: key used to store HSEdown in the entity.xtra attribute @type hse_down_key: string @param angle_key: key used to store the angle between CA-CB and CA-pCB in the entity.xtra attribute @type angle_key: string """ assert(offset>=0) # For PyMOL visualization self.ca_cb_list=[] ppb=CaPPBuilder() ppl=ppb.build_peptides(model) hse_map={} hse_list=[] hse_keys=[] ### GP if model.get_id()!=0: model = model[0] residues_in_pdb,residues_with_proper_CA=[],[] if check_chain_breaks==True: # for m in model: for chain in model: for res in chain: # try: if is_aa(res): residues_in_pdb.append(res.get_id()[1]) # except: # if is_aa(chain): # residues_in_pdb.append(chain.get_id()[1]) # print('chain', chain, res) # break self.clash_pairs = [] self.chain_breaks = [] if check_knots: possible_knots = PossibleKnots(receptor, signprot) knot_resis = possible_knots.get_resnums() self.remodel_resis = {} for pp1 in ppl: for i in range(0, len(pp1)): residues_with_proper_CA.append(pp1[i].get_id()[1]) if i==0: r1=None else: r1=pp1[i-1] r2=pp1[i] if i==len(pp1)-1: r3=None else: r3=pp1[i+1] # This method is provided by the subclasses to calculate HSE result=self._get_cb(r1, r2, r3) if result is None: # Missing atoms, or i==0, or i==len(pp1)-1 continue pcb, angle=result hse_u=0 hse_d=0 ca2=r2['CA'].get_vector() residue_up=[] ### GP residue_down=[] ### GP for pp2 in ppl: for j in range(0, len(pp2)): try: if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]: pass else: raise Exception except: if pp1 is pp2 and abs(i-j)<=offset: # neighboring residues in the chain are ignored continue ro=pp2[j] if not is_aa(ro) or not ro.has_id('CA'): continue cao=ro['CA'].get_vector() d=(cao-ca2) if d.norm()<radius: if d.angle(pcb)<(math.pi/2): hse_u+=1 ### GP # Puts residues' names in a list that were found in the upper half sphere residue_up.append(ro) ### end of GP code else: hse_d+=1 ### GP # Puts residues' names in a list that were found in the lower half sphere residue_down.append(ro) ### end of GP code res_id=r2.get_id() chain_id=r2.get_parent().get_id() # Fill the 3 data structures hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle) hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle))) ### GP residue_up and residue_down added to hse_list hse_keys.append((chain_id, res_id)) # Add to xtra r2.xtra[hse_up_key]=hse_u r2.xtra[hse_down_key]=hse_d if angle_key: r2.xtra[angle_key]=angle ### GP checking for knots if check_knots: for knot in knot_resis: if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id(): print(pp1[i].get_parent().get_id(),pp1[i]) for r in residue_up: if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]: print('close: ', r.get_parent().get_id(),r) resi_range = [knot[1][1][0], knot[1][1][-1]] if knot[1][0] not in self.remodel_resis: self.remodel_resis[knot[1][0]] = [resi_range] else: if resi_range not in self.remodel_resis[knot[1][0]]: self.remodel_resis[knot[1][0]].append(resi_range) ### GP checking for atom clashes include_prev, include_next = False, False try: if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]: include_prev = True except: include_prev = False try: if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]: include_next = True except: include_next = False for atom in pp1[i]: ref_vector = atom.get_vector() for other_res in residue_up: try: if other_res==pp1[i-1] and include_prev==False: continue elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False: continue else: raise Exception except: for other_atom in other_res: other_vector = other_atom.get_vector() d = other_vector-ref_vector if d.norm()<2: if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1: clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0') else: clash_res1 = pp1[i]['CA'].get_bfactor() if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1: clash_res2 = float(str(other_res['CA'].get_bfactor())+'0') else: clash_res2 = other_res['CA'].get_bfactor() self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])]) if check_chain_breaks==True: for r in residues_in_pdb: if r not in residues_with_proper_CA: self.chain_breaks.append(r)
def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None): """ @param model: model @type model: L{Model} @param radius: HSE radius @type radius: float @param offset: number of flanking residues that are ignored in the calculation of the number of neighbors @type offset: int @param hse_up_key: key used to store HSEup in the entity.xtra attribute @type hse_up_key: string @param hse_down_key: key used to store HSEdown in the entity.xtra attribute @type hse_down_key: string @param angle_key: key used to store the angle between CA-CB and CA-pCB in the entity.xtra attribute @type angle_key: string """ assert (offset >= 0) # For PyMOL visualization self.ca_cb_list = [] ppb = CaPPBuilder() ppl = ppb.build_peptides(model) hse_map = {} hse_list = [] hse_keys = [] ### GP self.clash_pairs = [] for pp1 in ppl: for i in range(0, len(pp1)): if i == 0: r1 = None else: r1 = pp1[i - 1] r2 = pp1[i] if i == len(pp1) - 1: r3 = None else: r3 = pp1[i + 1] # This method is provided by the subclasses to calculate HSE result = self._get_cb(r1, r2, r3) if result is None: # Missing atoms, or i==0, or i==len(pp1)-1 continue pcb, angle = result hse_u = 0 hse_d = 0 ca2 = r2['CA'].get_vector() residue_up = [] ### GP residue_down = [] ### GP for pp2 in ppl: for j in range(0, len(pp2)): if pp1 is pp2 and abs(i - j) <= offset: # neighboring residues in the chain are ignored continue ro = pp2[j] if not is_aa(ro) or not ro.has_id('CA'): continue cao = ro['CA'].get_vector() d = (cao - ca2) if d.norm() < radius: if d.angle(pcb) < (math.pi / 2): hse_u += 1 ### GP # Puts residues' names in a list that were found in the upper half sphere residue_up.append(ro) ### end of GP code else: hse_d += 1 ### GP # Puts residues' names in a list that were found in the lower half sphere residue_down.append(ro) ### end of GP code res_id = r2.get_id() chain_id = r2.get_parent().get_id() # Fill the 3 data structures hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle) hse_list.append( (r2, (residue_up, residue_down, hse_u, hse_d, angle))) ### GP residue_up and residue_down added to hse_list hse_keys.append((chain_id, res_id)) # Add to xtra r2.xtra[hse_up_key] = hse_u r2.xtra[hse_down_key] = hse_d if angle_key: r2.xtra[angle_key] = angle ### GP checking for atom clashes for atom in pp1[i]: ref_vector = atom.get_vector() for other_res in residue_up: try: if other_res != pp1[i - 1] and other_res != pp1[i + 1]: for other_atom in other_res: other_vector = other_atom.get_vector() d = other_vector - ref_vector if d.norm() < 2: self.clash_pairs.append([ (pp1[i]['CA'].get_bfactor(), pp1[i].get_id()[1]), (other_res['CA'].get_bfactor(), other_res.get_id()[1]) ]) except: pass
def __init__(self, model, radius, offset, hse_up_key, hse_down_key, angle_key=None): """ @param model: model @type model: L{Model} @param radius: HSE radius @type radius: float @param offset: number of flanking residues that are ignored in the calculation of the number of neighbors @type offset: int @param hse_up_key: key used to store HSEup in the entity.xtra attribute @type hse_up_key: string @param hse_down_key: key used to store HSEdown in the entity.xtra attribute @type hse_down_key: string @param angle_key: key used to store the angle between CA-CB and CA-pCB in the entity.xtra attribute @type angle_key: string """ assert(offset>=0) # For PyMOL visualization self.ca_cb_list=[] ppb=CaPPBuilder() ppl=ppb.build_peptides(model) hse_map={} hse_list=[] hse_keys=[] for pp1 in ppl: for i in range(0, len(pp1)): if i==0: r1=None else: r1=pp1[i-1] r2=pp1[i] if i==len(pp1)-1: r3=None else: r3=pp1[i+1] # This method is provided by the subclasses to calculate HSE result=self._get_cb(r1, r2, r3) if result is None: # Missing atoms, or i==0, or i==len(pp1)-1 continue pcb, angle=result hse_u=0 hse_d=0 ca2=r2['CA'].get_vector() for pp2 in ppl: for j in range(0, len(pp2)): if pp1 is pp2 and abs(i-j)<=offset: # neighboring residues in the chain are ignored continue ro=pp2[j] if not is_aa(ro) or not ro.has_id('CA'): continue cao=ro['CA'].get_vector() d=(cao-ca2) if d.norm()<radius: if d.angle(pcb)<(pi/2): hse_u+=1 else: hse_d+=1 res_id=r2.get_id() chain_id=r2.get_parent().get_id() # Fill the 3 data structures hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle) hse_list.append((r2, (hse_u, hse_d, angle))) hse_keys.append((chain_id, res_id)) # Add to xtra r2.xtra[hse_up_key]=hse_u r2.xtra[hse_down_key]=hse_d if angle_key: r2.xtra[angle_key]=angle AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
class ContactMapper(FeaturesComputer): ''' Extends FeaturesComputer class. Extracts res and chainIds for training and predicting and computes contact maps for training for a given complex ''' def __init__(self, rFname, lFname, computedFeatsRootDir=None, boundAvailable=True, res2res_dist=6.0, isForPrediction=False, statusManager=None): ''' @param rFname: str. path to receptor pdb file @param lFname: str. path to ligand pdb file @param computedFeatsRootDir: str. path where features will be stored @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located at the same path that unbound structures and need to be named as in the following example: 1A2K_l_u.pdb 1A2K_r_b.pdb @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting (Amstrongs) @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan @param statusManager: class that implements .setStatus(msg) to communicate ''' FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir) self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0] self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0] if self.prefixR == self.prefixL: self.prefix = self.prefixR else: if "<" in self.prefixL: raise FeatureComputerException( "Error. Ligand pdbFile name %s must not contain '<' or '>' character" % lFname) if ">" in self.prefixR: raise FeatureComputerException( "Error. Receptor pdbFile name %s must not contain '<' or'>' character" % rFname) self.prefixR = self.getExtendedPrefix(rFname) self.prefixL = self.getExtendedPrefix(lFname) self.prefix = self.prefixL + "<->" + self.prefixR self.isForPrediction = isForPrediction self.res2res_dist = res2res_dist self.boundAvailable = boundAvailable self.outPath = myMakeDir(self.computedFeatsRootDir, "common/contactMaps") self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab") self.parser = PDBParser(QUIET=True) # self.ppb=PPBuilder( radius= 200) # To not worry for broken chains self.ppb = CaPPBuilder() self.computeFun = self.contactMapOneComplex def mapBoundToUnbound(self, structureUnbound, structureBound, skipBoundChainsIds=set([])): ''' Obtains correspondence between unbound structure and bound structure when available. Returns a dictionary that maps bound_residue --> equivalent unbound_residue @param structureUnbound: Bio.PDB.Structure. Structure in bound state @param structureBound: Bio.PDB.Structure. Structure in unbound state @param skipBoundChainsIds: Set of Chars. Set of chain ids that will be skipped for calculations. @return bound2UnboundMapDict: Dict {Bio.PDB.Residue (from bound structure): Bio.PDB.Residue (from unbound structure)} ''' bound2UnboundMapDict = {} pp_list_unbound = self.ppb.build_peptides(structureUnbound, aa_only=False) if structureBound is None: # if there is no bound structure, use just unbound. boundToUnboundMap = lambda x: x #For a given residue will return the same residue pp_list_bound = pp_list_unbound else: pp_list_bound = self.ppb.build_peptides(structureBound, aa_only=False) mapper = BoundUnboundMapper( pp_list_unbound, pp_list_bound) # res_bound->res_unbound mapper object mapper.build_correspondence() boundToUnboundMap = mapper.mapBoundToUnbound #For a given bound residue will return its unbound equivalent for pp in pp_list_bound: for resBound in pp: chainBound = resBound.get_full_id()[2] # str chainId if chainBound in skipBoundChainsIds: continue resUnbound = boundToUnboundMap(resBound) if not resUnbound is None: #In case there is no equivalent unbound residue for a given bound residue bound2UnboundMapDict[resBound] = resUnbound return bound2UnboundMapDict def fixHomooligomers(self, structureL, structureR, positiveContacts, chainsInContactL, chainsInContactR): ''' For each interacting pair of residues (resL_1, resR_2), it will add to positiveContacts (res_1L', resR_2) and/or (resL_1, resR_2') where resL_1' is an equivalent residue in homooligomers of ligand @param structureL: Bio.PDB.Structure. Structure of ligand @param structureR: Bio.PDB.Structure. Structure of receptor @param positiveContacts: [(ligandResId, receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue @param chainsInContactL: [(ligandResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue @param chainsInContactR: [(receptorResId)]: ligandResId and receptorResIds are full_ids of Bio.PDB.Residue @return positiveContacts, chainsInContactL, chainsInContactR. Updated with equivalent residues interactions added ''' pp_list_l = self.ppb.build_peptides(structureL, aa_only=False) equivalentLmapper = HomoOligomerFinder(pp_list_l, positiveContacts, chainType="l") positiveContacts, chainsInContactL = equivalentLmapper.update_interactions( ) pp_list_r = self.ppb.build_peptides(structureR, aa_only=False) equivalentRmapper = HomoOligomerFinder(pp_list_r, positiveContacts, chainType="r") positiveContacts, chainsInContactR = equivalentRmapper.update_interactions( ) return positiveContacts, chainsInContactL, chainsInContactR def getPairsOfResiduesInContact(self, structureL, structureR): ''' Computes which amino acids of ligand are in contact with which amino acids of receptor @param structureL: Bio.PDB.Structure. Structure of ligand (bound state if available) @param structureR: Bio.PDB.Structure. Structure of receptor (bound state if available). @return positiveContacts: Set {(Bio.PDB.Residue.fullResId (from bound structure structureL), Bio.PDB.Residue.fullResId (from bound structure structureR))} @return chainsNotContactL: Set { str(chainId structureL)} @return chainsNotContactR: Set { str(chainId structureR)} ''' try: atomListL = [ atom for atom in structureL.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 1") try: atomListR = [ atom for atom in structureR.child_list[0].get_atoms() if not atom.name.startswith("H") ] except IndexError: raise NoValidPDBFile("Problems parsing pdbFile 2") searcher = NeighborSearch(atomListL + atomListR) allNeigs = searcher.search_all(self.res2res_dist, level="R") lStructId = structureL.get_id() rStructId = structureR.get_id() positiveContacts = set([]) chainsInContactL = set([]) chainsInContactR = set([]) for res1, res2 in allNeigs: pdbId1, modelId1, chainId1, resId1 = res1.get_full_id() pdbId2, modelId2, chainId2, resId2 = res2.get_full_id() fullResId1 = res1.get_full_id() fullResId2 = res2.get_full_id() if pdbId1 == lStructId and pdbId2 == rStructId: positiveContacts.add((fullResId1, fullResId2)) chainsInContactL.add(fullResId1[2]) chainsInContactR.add(fullResId2[2]) elif pdbId1 == rStructId and pdbId2 == lStructId: positiveContacts.add((fullResId2, fullResId1)) chainsInContactL.add(fullResId2[2]) chainsInContactR.add(fullResId1[2]) if CONSIDER_HOMOOLIG_AS_POS: positiveContacts, chainsInContactL, chainsInContactR = self.fixHomooligomers( structureL, structureR, positiveContacts, chainsInContactL, chainsInContactR) allChainsL = set([elem.get_id() for elem in structureL[0].get_list()]) allChainsR = set([elem.get_id() for elem in structureR[0].get_list()]) chainsNotContactL = allChainsL.difference(chainsInContactL) chainsNotContactR = allChainsR.difference(chainsInContactR) return positiveContacts, chainsNotContactL, chainsNotContactR def contactMapOneComplex(self): ''' Computes the contact map of a complex. Initial input for complex codification. Contact map is a file written at self.computedFeatsRootDir/common/contactMaps/ with name prefix.cMap.tab where prefix is either the common name of ligand and receptor pdb files or the concatenation of ligand and receptor names. 1A2K_l_u.pdb and 1A2K_r_u.pdb --> 1A2K.cMap.tab 1A2K_l_u.pdb and 1A22.pdb --> 1A2K-1A22.cMap.tab ''' outName = self.outName print(outName) if os.path.isfile(outName): print('Already computed contact map') return 0 lStructId = self.prefixL + "_l_u.pdb" rStructId = self.prefixR + "_r_u.pdb" structureL_u = self.parser.get_structure(lStructId, self.lFname) structureR_u = self.parser.get_structure(rStructId, self.rFname) if self.boundAvailable == False or self.isForPrediction: structureL_b = None structureR_b = None else: try: lStructId_b = self.prefix + "_l_b.pdb" rStructId_b = self.prefix + "_r_b.pdb" lFname_b = os.path.join( os.path.split(self.lFname)[0], lStructId_b) rFname_b = os.path.join( os.path.split(self.rFname)[0], rStructId_b) structureL_b = self.parser.get_structure(lStructId_b, lFname_b) structureR_b = self.parser.get_structure(rStructId_b, rFname_b) except IOError as e: # in this case there are just unbound pdbs available structureL_b = None structureR_b = None if self.isForPrediction: positiveContacts = None chainsNotContactR = set([]) chainsNotContactL = set([]) elif structureL_b is None or structureR_b is None: #Compute contacs in bound structures positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact( structureL_u, structureR_u) else: #Compute contacs in unbound structures positiveContacts, chainsNotContactL, chainsNotContactR = self.getPairsOfResiduesInContact( structureL_b, structureR_b) if JUST_INTERACTING_CHAINS == False: chainsNotContactR = set([]) chainsNotContactL = set([]) rResDict = self.mapBoundToUnbound(structureR_u, structureR_b, skipBoundChainsIds=chainsNotContactR) lResDict = self.mapBoundToUnbound(structureL_u, structureL_b, skipBoundChainsIds=chainsNotContactL) nResiduesL = len(lResDict) nResiduesR = len(rResDict) if not (self.minNumResiduesPartner < nResiduesL < self.maxNumResiduesPartner): raise BadNumberOfResidues(nResiduesL, "1") if not (self.minNumResiduesPartner < nResiduesR < self.maxNumResiduesPartner): raise BadNumberOfResidues(nResiduesR, "2") outFile = open(outName, "w") outFile.write( "chainIdL structResIdL resNameL chainIdR structResIdR resNameR categ\n" ) # print(sorted(lResDict, key= lambda x: x.get_id())) # a= raw_input() try: for resL_bound in sorted(lResDict, key=lambda x: x.get_full_id()): # print(resL_bound.get_full_id()) resL_unbound = lResDict[resL_bound] pdbIdL, modelL, chainIdL, resIdL = resL_unbound.get_full_id() resIdL = self.makeStrResId(resIdL) try: letraL = three_to_one(resL_unbound.resname) if letraL != three_to_one(resL_bound.resname): continue except KeyError: continue for resR_bound in sorted(rResDict, key=lambda x: x.get_full_id()): resR_unbound = rResDict[resR_bound] pdbIdR, modelR, chainIdR, resIdR = resR_unbound.get_full_id( ) try: letraR = three_to_one(resR_unbound.resname) if letraR != three_to_one(resR_bound.resname): continue except KeyError: continue if self.isForPrediction: categ = np.nan elif (resL_bound.get_full_id(), resR_bound.get_full_id()) in positiveContacts: categ = 1 else: categ = -1 resIdR = self.makeStrResId(resIdR) if chainIdL == " ": chainIdL = "*" if chainIdR == " ": chainIdR = "*" # print("%s %s %s %s %s %s %s\n" %(chainIdL, resIdL, letraL, chainIdR, resIdR, letraR, categ) ) # raw_input("enter") outFile.write("%s %s %s %s %s %s %s\n" % (chainIdL, resIdL, letraL, chainIdR, resIdR, letraR, categ)) outFile.close() except (KeyboardInterrupt, Exception): print("Exception happend computing %s" % outName) tryToRemove(outName) raise def makeStrResId(self, resId): valList = [str(elem) for elem in resId[1:]] finalId = "".join(valList).strip() return finalId