def extract_seq_from_models(protien_name, file_name, fasta_to_write): print "Working with.....: ", file_name #GET the first model from the pdb and write to a temp file #the code based on BioPython lib to extract the 1-letter sequence works fast if we just one model from a large pdb #thus reducing the memory temp_pdb_file = "_temp_pdb.pdb" with open(temp_pdb_file, "a") as temp: with open(file_name) as ip: for line in ip: temp.write(line) if (line[0] == 'T'): break structure = PDBParser().get_structure(protien_name, temp_pdb_file) # Using CA-CA ppb = CaPPBuilder() for pp in ppb.build_peptides(structure): seq = pp.get_sequence() os.remove(fasta_to_write) with open(fasta_to_write, "a") as fasta: fasta.write(">" + protien_name + "_seq\n") fasta.write(str(seq)) ip.close() temp.close() fasta.close() os.remove(temp_pdb_file) print "Done, output file stored at: ", fasta_to_write
def __init__(self, model, radius=12.0, offset=0): """Initialize. A residue's exposure is defined as the number of CA atoms around that residues CA atom. A dictionary is returned that uses a L{Residue} object as key, and the residue exposure as corresponding value. :param model: the model that contains the residues :type model: L{Model} :param radius: radius of the sphere (centred at the CA atom) :type radius: float :param offset: number of flanking residues that are ignored in the calculation of the number of neighbors :type offset: int """ assert (offset >= 0) ppb = CaPPBuilder() ppl = ppb.build_peptides(model) fs_map = {} fs_list = [] fs_keys = [] for pp1 in ppl: for i in range(0, len(pp1)): fs = 0 r1 = pp1[i] if not is_aa(r1) or not r1.has_id('CA'): continue ca1 = r1['CA'] for pp2 in ppl: for j in range(0, len(pp2)): if pp1 is pp2 and abs(i - j) <= offset: continue r2 = pp2[j] if not is_aa(r2) or not r2.has_id('CA'): continue ca2 = r2['CA'] d = (ca2 - ca1) if d < radius: fs += 1 res_id = r1.get_id() chain_id = r1.get_parent().get_id() # Fill the 3 data structures fs_map[(chain_id, res_id)] = fs fs_list.append((r1, fs)) fs_keys.append((chain_id, res_id)) # Add to xtra r1.xtra['EXP_CN'] = fs AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
def __init__(self, rFname, lFname, computedFeatsRootDir=None, boundAvailable=True, res2res_dist=6.0, isForPrediction=False, statusManager=None): ''' @param rFname: str. path to receptor pdb file @param lFname: str. path to ligand pdb file @param computedFeatsRootDir: str. path where features will be stored @param boundAvailable: bool. True if bound structures are available. False otherwise. Bound structures must be located at the same path that unbound structures and need to be named as in the following example: 1A2K_l_u.pdb 1A2K_r_b.pdb @param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting (Amstrongs) @param isForPrediction: bool. False to compute contacts between amino acids, True otherwise. Positive contacts will be tag as 1, negative as -1. If True, all amino acids will have as tag np.nan @param statusManager: class that implements .setStatus(msg) to communicate ''' FeaturesComputer.__init__(self, rFname, lFname, computedFeatsRootDir) self.prefixR = os.path.split(rFname)[1].split(".")[0].split("_")[0] self.prefixL = os.path.split(lFname)[1].split(".")[0].split("_")[0] if self.prefixR == self.prefixL: self.prefix = self.prefixR else: if "<" in self.prefixL: raise FeatureComputerException( "Error. Ligand pdbFile name %s must not contain '<' or '>' character" % lFname) if ">" in self.prefixR: raise FeatureComputerException( "Error. Receptor pdbFile name %s must not contain '<' or'>' character" % rFname) self.prefixR = self.getExtendedPrefix(rFname) self.prefixL = self.getExtendedPrefix(lFname) self.prefix = self.prefixL + "<->" + self.prefixR self.isForPrediction = isForPrediction self.res2res_dist = res2res_dist self.boundAvailable = boundAvailable self.outPath = myMakeDir(self.computedFeatsRootDir, "common/contactMaps") self.outName = os.path.join(self.outPath, self.prefix + ".cMap.tab") self.parser = PDBParser(QUIET=True) # self.ppb=PPBuilder( radius= 200) # To not worry for broken chains self.ppb = CaPPBuilder() self.computeFun = self.contactMapOneComplex
def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, check_knots=False, receptor=None, signprot=None): """ @param model: model @type model: L{Model} @param radius: HSE radius @type radius: float @param offset: number of flanking residues that are ignored in the calculation of the number of neighbors @type offset: int @param hse_up_key: key used to store HSEup in the entity.xtra attribute @type hse_up_key: string @param hse_down_key: key used to store HSEdown in the entity.xtra attribute @type hse_down_key: string @param angle_key: key used to store the angle between CA-CB and CA-pCB in the entity.xtra attribute @type angle_key: string """ assert(offset>=0) # For PyMOL visualization self.ca_cb_list=[] ppb=CaPPBuilder() ppl=ppb.build_peptides(model) hse_map={} hse_list=[] hse_keys=[] ### GP if model.get_id()!=0: model = model[0] residues_in_pdb,residues_with_proper_CA=[],[] if check_chain_breaks==True: # for m in model: for chain in model: for res in chain: # try: if is_aa(res): residues_in_pdb.append(res.get_id()[1]) # except: # if is_aa(chain): # residues_in_pdb.append(chain.get_id()[1]) # print('chain', chain, res) # break self.clash_pairs = [] self.chain_breaks = [] if check_knots: possible_knots = PossibleKnots(receptor, signprot) knot_resis = possible_knots.get_resnums() self.remodel_resis = {} for pp1 in ppl: for i in range(0, len(pp1)): residues_with_proper_CA.append(pp1[i].get_id()[1]) if i==0: r1=None else: r1=pp1[i-1] r2=pp1[i] if i==len(pp1)-1: r3=None else: r3=pp1[i+1] # This method is provided by the subclasses to calculate HSE result=self._get_cb(r1, r2, r3) if result is None: # Missing atoms, or i==0, or i==len(pp1)-1 continue pcb, angle=result hse_u=0 hse_d=0 ca2=r2['CA'].get_vector() residue_up=[] ### GP residue_down=[] ### GP for pp2 in ppl: for j in range(0, len(pp2)): try: if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]: pass else: raise Exception except: if pp1 is pp2 and abs(i-j)<=offset: # neighboring residues in the chain are ignored continue ro=pp2[j] if not is_aa(ro) or not ro.has_id('CA'): continue cao=ro['CA'].get_vector() d=(cao-ca2) if d.norm()<radius: if d.angle(pcb)<(math.pi/2): hse_u+=1 ### GP # Puts residues' names in a list that were found in the upper half sphere residue_up.append(ro) ### end of GP code else: hse_d+=1 ### GP # Puts residues' names in a list that were found in the lower half sphere residue_down.append(ro) ### end of GP code res_id=r2.get_id() chain_id=r2.get_parent().get_id() # Fill the 3 data structures hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle) hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle))) ### GP residue_up and residue_down added to hse_list hse_keys.append((chain_id, res_id)) # Add to xtra r2.xtra[hse_up_key]=hse_u r2.xtra[hse_down_key]=hse_d if angle_key: r2.xtra[angle_key]=angle ### GP checking for knots if check_knots: for knot in knot_resis: if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id(): print(pp1[i].get_parent().get_id(),pp1[i]) for r in residue_up: if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]: print('close: ', r.get_parent().get_id(),r) resi_range = [knot[1][1][0], knot[1][1][-1]] if knot[1][0] not in self.remodel_resis: self.remodel_resis[knot[1][0]] = [resi_range] else: if resi_range not in self.remodel_resis[knot[1][0]]: self.remodel_resis[knot[1][0]].append(resi_range) ### GP checking for atom clashes include_prev, include_next = False, False try: if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]: include_prev = True except: include_prev = False try: if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]: include_next = True except: include_next = False for atom in pp1[i]: ref_vector = atom.get_vector() for other_res in residue_up: try: if other_res==pp1[i-1] and include_prev==False: continue elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False: continue else: raise Exception except: for other_atom in other_res: other_vector = other_atom.get_vector() d = other_vector-ref_vector if d.norm()<2: if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1: clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0') else: clash_res1 = pp1[i]['CA'].get_bfactor() if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1: clash_res2 = float(str(other_res['CA'].get_bfactor())+'0') else: clash_res2 = other_res['CA'].get_bfactor() self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])]) if check_chain_breaks==True: for r in residues_in_pdb: if r not in residues_with_proper_CA: self.chain_breaks.append(r)
def __init__(self, model, radius, offset, hse_up_key, hse_down_key, angle_key=None): """ @param model: model @type model: L{Model} @param radius: HSE radius @type radius: float @param offset: number of flanking residues that are ignored in the calculation of the number of neighbors @type offset: int @param hse_up_key: key used to store HSEup in the entity.xtra attribute @type hse_up_key: string @param hse_down_key: key used to store HSEdown in the entity.xtra attribute @type hse_down_key: string @param angle_key: key used to store the angle between CA-CB and CA-pCB in the entity.xtra attribute @type angle_key: string """ assert (offset >= 0) # For PyMOL visualization self.ca_cb_list = [] ppb = CaPPBuilder() ppl = ppb.build_peptides(model) hse_map = {} hse_list = [] hse_keys = [] for pp1 in ppl: for i in range(0, len(pp1)): if i == 0: r1 = None else: r1 = pp1[i - 1] r2 = pp1[i] if i == len(pp1) - 1: r3 = None else: r3 = pp1[i + 1] # This method is provided by the subclasses to calculate HSE result = self._get_cb(r1, r2, r3) if result is None: # Missing atoms, or i==0, or i==len(pp1)-1 continue pcb, angle = result hse_u = 0 hse_d = 0 ca2 = r2['CA'].get_vector() for pp2 in ppl: for j in range(0, len(pp2)): if pp1 is pp2 and abs(i - j) <= offset: # neighboring residues in the chain are ignored continue ro = pp2[j] if not is_aa(ro) or not ro.has_id('CA'): continue cao = ro['CA'].get_vector() d = (cao - ca2) if d.norm() < radius: if d.angle(pcb) < (pi / 2): hse_u += 1 else: hse_d += 1 res_id = r2.get_id() chain_id = r2.get_parent().get_id() # Fill the 3 data structures hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle) hse_list.append((r2, (hse_u, hse_d, angle))) hse_keys.append((chain_id, res_id)) # Add to xtra r2.xtra[hse_up_key] = hse_u r2.xtra[hse_down_key] = hse_d if angle_key: r2.xtra[angle_key] = angle AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)
def main(argv=None): # IGNORE:C0111 '''Command line options.''' if argv is None: argv = sys.argv else: sys.argv.extend(argv) parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %(default)s]") # parser.add_argument("-dir", "--structs_dir", required = True ) parser.add_argument("-db", "--database_name", default='pdb') parser.add_argument("-host", "--db_host", default='127.0.0.1') parser.add_argument("--procesados", default='/tmp/pdbs_dist_procesados.txt') parser.add_argument("--domains", default='/data/databases/pdb/processed/dns_pdbs.tlb') parser.add_argument( "--seqs", default='/data/databases/pdb/processed/pdb_seq_res.fasta') parser.add_argument("--pdbs", default='/data/databases/pdb/') parser.add_argument( "--distances", default='/data/databases/pdb/processed/distances.tbl', help= "Final output: table with atom distances between residues and ligands. Only for distances less than 'dist' parameter" ) parser.add_argument("--dist", default=5) parser.add_argument( "--pdbs_with_drug", default='/data/databases/pdb/processed/pdbs_with_drug.txt', help="Output: list of PDB codes with an associated ligand") args = parser.parse_args() if not os.path.exists(args.pdbs): sys.stderr.write( "%s not found. Specify where is pdbs/divided directory" % (parser.pdbs)) sys.exit(1) PDB_PATH = args.pdbs CONTACT_DIST = args.dist pdbs_with_drug_path = args.pdbs_with_drug if not os.path.exists(os.path.dirname(args.pdbs_with_drug)): sys.stderr.write("can't %s create %s. Set pdbs_with_drug correctly" % (pdbs_with_drug_path)) sys.exit(1) if not os.path.exists(os.path.dirname(args.distances)): sys.stderr.write("can't %s create %s. Set distances correctly" % (args.distances)) sys.exit(1) pdbs_procesados_path = args.procesados print( "In %s the processed pdbs are kept, if the file is deleted, the process starts from scratch " % pdbs_procesados_path) print("Outputs: '%s' and '%s' " % (pdbs_with_drug_path, args.distances)) pdbs_procesados = [] if os.path.exists(pdbs_procesados_path): with open(pdbs_procesados_path) as handle: pdbs_procesados = [x.strip() for x in handle.readlines()] pdbs_procesados = {x: 1 for x in pdbs_procesados} pdbs_iterator = PDBsIterator(pdb_dir=args.pdbs) def not_processed_iter(): for pdb, pdb_path in pdbs_iterator: if pdb not in pdbs_procesados: yield [pdb, pdb_path] DNsPDBs = args.domains if not os.path.exists(DNsPDBs): seqs_from_pdb = args.seqs if not os.path.exists(seqs_from_pdb): sys.stderr.write( "%s does not exists and %s not found. Specify where it is." % (DNsPDBs, seqs_from_pdb)) sys.exit(1) sys.stderr.write( "%s not found. You can create it with the following command: \n" % DNsPDBs) sys.stderr.write( "hmmscan --cut_tc --domtblout dns_pdbs.tlb --acc -o pdb_seq_res.hmm Pfam-A.hmm seqs_from_pdb.fasta" ) sys.exit(1) drugcompounds = [ x for x, y in compound_type.items() if y in ["DRUG", "COFACTOR"] ] othercompounds = [ x for x, y in compound_type.items() if y in ["METAL", "SUGAR", "NUCLEOTIDE", "LIPID"] ] aminoacidcompounds = [ x for x, y in compound_type.items() if y in ["MODIFIED", "RESIDUE"] ] drugcompounds = othercompounds + drugcompounds pdbs_with_drug_path = "/data/databases/pdb/processed/pdbs_with_drug.txt" _log.info("proceced pdbs: %i" % len(pdbs_procesados)) ppb = CaPPBuilder() p = PDBParser(PERMISSIVE=1, QUIET=1) pdbs_with_drug = [] if os.path.exists(pdbs_with_drug_path): _log.info("pdbs with drugs already loaded") with open(pdbs_with_drug_path) as handle: for x in handle.readlines(): pdbs_with_drug.append(x.strip()) else: with open(pdbs_with_drug_path, "a") as handle: _log.info("pdbs with drugs will be loaded") pdbs = list(pdbs_iterator) for pdb, file_path in tqdm(pdbs): try: if pdb not in pdbs_with_drug: structure = p.get_structure(pdb, file_path) for res in structure.get_residues(): if res.resname in drugcompounds: pdbs_with_drug.append(pdb) handle.write(pdb + "\n") handle.flush() break except Exception as ex: print(str(ex)) # import re # dns_table = re.sub(r" +", "\t","\n".join( [str(i) + "\t" + x for i,x in enumerate(open('/data/databases/pdb/processed/dns_pdbs.tlb').readlines()) if not x.startswith("#") ]) ) if not os.path.exists(DNsPDBs + "2"): cols = [ "target_name", "accession", "tlen", "query_name", "accession2", "qlen", "E-value", "score1", "bias1", "#", "of", "c-Evalue", "i-Evalue", "score2", "bias2", "from1", "to1", "from2", "to2", "from3", "to3", "acc" ] _log.info("correcting hmmer-pdb output") regexp = re.compile(" +") items = [] for x in tqdm(open(DNsPDBs).readlines()): if not x.startswith("#"): line = regexp.split(x) items.append(line[0:len(cols)]) # record = {c: line[i] for i, c in enumerate(cols)} df_hmm = pd.DataFrame.from_records(items, columns=cols) # df_hmm = df = pd.read_table('/data/databases/pdb/processed/dns_pdbs.tlb', index_col=None, header=None, delimiter=r"\s+",comment="#",names=cols) # df_hmm = df_hmm.dropna() df_hmm = df_hmm[["accession", "query_name", "from3", "to3"]] df_hmm.to_csv(DNsPDBs + "2") df_hmm["pdb"] = map(lambda x: x.split("_")[0].lower().strip(), df_hmm["query_name"]) df_hmm["chain"] = map(lambda x: x.split("_")[1].upper().strip(), df_hmm["query_name"]) df_hmm["start_res"] = map(lambda x: x.split("_")[2].upper().strip(), df_hmm["query_name"]) df_hmm["end_res"] = map(lambda x: x.split("_")[3].upper().strip(), df_hmm["query_name"]) else: df_hmm = pd.read_csv(DNsPDBs + "2") df_hmm["pdb"] = map(lambda x: x.split("_")[0].lower().strip(), df_hmm["query_name"]) df_hmm["chain"] = map(lambda x: x.split("_")[1].upper().strip(), df_hmm["query_name"]) df_hmm["start_res"] = map(lambda x: x.split("_")[2].upper().strip(), df_hmm["query_name"]) df_hmm["end_res"] = map(lambda x: x.split("_")[3].upper().strip(), df_hmm["query_name"]) print(len(df_hmm)) lock = Lock() def centeroid(arr): length = len(arr) sum_x = np.sum([x.coord[0] for x in arr]) sum_y = np.sum([x.coord[1] for x in arr]) sum_z = np.sum([x.coord[2] for x in arr]) return sum_x / length, sum_y / length, sum_z / length def residues_near_drug(drug_centroid, aa_residues): residues_near = [] for r in aa_residues: for a in list(r): dist = a - Struct(coord=drug_centroid) if dist > 20: break if dist < 10: residues_near.append(r) break return residues_near def juan(pdb_raw): try: pepe(pdb_raw) except Exception: traceback.print_exc() finally: with lock: pdbs_procesados.append(pdb_raw) with open(pdbs_procesados_path, "a") as handle: handle.write(pdb_raw + "\n") def pepe(pdb): ppb = CaPPBuilder() p = PDBParser(PERMISSIVE=1, QUIET=1) path_dir = PDB_PATH + "/" + pdb[1:3].lower() + "/" path = path_dir + "pdb" + pdb.lower() + ".ent" model = list(p.get_structure('X', path))[0] for chain_obj in list(model): chain = chain_obj.id hmm_residues = {} pdb_seq = list(model[chain].get_residues()) if pdb_seq: hmm_contacts = {} hmm_residues = {} hmms = df_hmm[(df_hmm["pdb"] == pdb) & (df_hmm["chain"] == chain) & (df_hmm["start_res"] == str(pdb_seq[0].id[1]))] for j, hmm in hmms.iterrows(): try: hmm_start = int(hmm["from3"]) - 1 hmm_end = int(hmm["to3"]) - 1 hmm_chain_name = "_".join( map(str, [ hmm["accession"].split(".")[0], hmm["chain"], pdb_seq[hmm_start].id[1], pdb_seq[hmm_end].id[1] ])) hmm_contacts[hmm_chain_name] = [] hmm_residues.update({ res.id[1]: hmm_chain_name for res in pdb_seq[hmm_start:hmm_end] }) except IndexError: print(pdb, hmm["accession"], hmm["chain"], hmm_start, hmm_end, pdb_seq) aa_residues = [] drug_molecules = [] for res_obj in chain_obj.get_residues(): if res_obj.resname in drugcompounds: drug_molecules.append(res_obj) elif res_obj.resname in aminoacidcompounds: aa_residues.append(res_obj) for res_drug_obj in drug_molecules: drug_centroid = centeroid(list(res_drug_obj)) near_residues = residues_near_drug(drug_centroid, aa_residues) for drug_atom in list(res_drug_obj): for near_residue in near_residues: for residue_atom in list(near_residue): distance = (residue_atom - drug_atom) if distance > 20: break if distance < CONTACT_DIST: with open(args.distances, "a") as handle: hmm_name = hmm_residues[ near_residue.id[1]] if near_residue.id[ 1] in hmm_residues else "NoDn" fields = [ pdb, chain, hmm_name, near_residue.id[1], near_residue.resname, residue_atom.serial_number, res_drug_obj.id[1], res_drug_obj.resname, drug_atom.serial_number, distance ] handle.write("\t".join(map(str, fields)) + "\n") _log.info("processing distances file") for x in tqdm(set(pdbs_with_drug)): if x not in pdbs_procesados: juan(x) # pool = ThreadPool(1) # pool.map(juan, set(pdbs_with_drug) - set(pdbs_procesados)) print("Finished!!!")
def pepe(pdb): ppb = CaPPBuilder() p = PDBParser(PERMISSIVE=1, QUIET=1) path_dir = PDB_PATH + "/" + pdb[1:3].lower() + "/" path = path_dir + "pdb" + pdb.lower() + ".ent" model = list(p.get_structure('X', path))[0] for chain_obj in list(model): chain = chain_obj.id hmm_residues = {} pdb_seq = list(model[chain].get_residues()) if pdb_seq: hmm_contacts = {} hmm_residues = {} hmms = df_hmm[(df_hmm["pdb"] == pdb) & (df_hmm["chain"] == chain) & (df_hmm["start_res"] == str(pdb_seq[0].id[1]))] for j, hmm in hmms.iterrows(): try: hmm_start = int(hmm["from3"]) - 1 hmm_end = int(hmm["to3"]) - 1 hmm_chain_name = "_".join( map(str, [ hmm["accession"].split(".")[0], hmm["chain"], pdb_seq[hmm_start].id[1], pdb_seq[hmm_end].id[1] ])) hmm_contacts[hmm_chain_name] = [] hmm_residues.update({ res.id[1]: hmm_chain_name for res in pdb_seq[hmm_start:hmm_end] }) except IndexError: print(pdb, hmm["accession"], hmm["chain"], hmm_start, hmm_end, pdb_seq) aa_residues = [] drug_molecules = [] for res_obj in chain_obj.get_residues(): if res_obj.resname in drugcompounds: drug_molecules.append(res_obj) elif res_obj.resname in aminoacidcompounds: aa_residues.append(res_obj) for res_drug_obj in drug_molecules: drug_centroid = centeroid(list(res_drug_obj)) near_residues = residues_near_drug(drug_centroid, aa_residues) for drug_atom in list(res_drug_obj): for near_residue in near_residues: for residue_atom in list(near_residue): distance = (residue_atom - drug_atom) if distance > 20: break if distance < CONTACT_DIST: with open(args.distances, "a") as handle: hmm_name = hmm_residues[ near_residue.id[1]] if near_residue.id[ 1] in hmm_residues else "NoDn" fields = [ pdb, chain, hmm_name, near_residue.id[1], near_residue.resname, residue_atom.serial_number, res_drug_obj.id[1], res_drug_obj.resname, drug_atom.serial_number, distance ] handle.write("\t".join(map(str, fields)) + "\n")
def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None): """ @param model: model @type model: L{Model} @param radius: HSE radius @type radius: float @param offset: number of flanking residues that are ignored in the calculation of the number of neighbors @type offset: int @param hse_up_key: key used to store HSEup in the entity.xtra attribute @type hse_up_key: string @param hse_down_key: key used to store HSEdown in the entity.xtra attribute @type hse_down_key: string @param angle_key: key used to store the angle between CA-CB and CA-pCB in the entity.xtra attribute @type angle_key: string """ assert (offset >= 0) # For PyMOL visualization self.ca_cb_list = [] ppb = CaPPBuilder() ppl = ppb.build_peptides(model) hse_map = {} hse_list = [] hse_keys = [] ### GP self.clash_pairs = [] for pp1 in ppl: for i in range(0, len(pp1)): if i == 0: r1 = None else: r1 = pp1[i - 1] r2 = pp1[i] if i == len(pp1) - 1: r3 = None else: r3 = pp1[i + 1] # This method is provided by the subclasses to calculate HSE result = self._get_cb(r1, r2, r3) if result is None: # Missing atoms, or i==0, or i==len(pp1)-1 continue pcb, angle = result hse_u = 0 hse_d = 0 ca2 = r2['CA'].get_vector() residue_up = [] ### GP residue_down = [] ### GP for pp2 in ppl: for j in range(0, len(pp2)): if pp1 is pp2 and abs(i - j) <= offset: # neighboring residues in the chain are ignored continue ro = pp2[j] if not is_aa(ro) or not ro.has_id('CA'): continue cao = ro['CA'].get_vector() d = (cao - ca2) if d.norm() < radius: if d.angle(pcb) < (math.pi / 2): hse_u += 1 ### GP # Puts residues' names in a list that were found in the upper half sphere residue_up.append(ro) ### end of GP code else: hse_d += 1 ### GP # Puts residues' names in a list that were found in the lower half sphere residue_down.append(ro) ### end of GP code res_id = r2.get_id() chain_id = r2.get_parent().get_id() # Fill the 3 data structures hse_map[(chain_id, res_id)] = (hse_u, hse_d, angle) hse_list.append( (r2, (residue_up, residue_down, hse_u, hse_d, angle))) ### GP residue_up and residue_down added to hse_list hse_keys.append((chain_id, res_id)) # Add to xtra r2.xtra[hse_up_key] = hse_u r2.xtra[hse_down_key] = hse_d if angle_key: r2.xtra[angle_key] = angle ### GP checking for atom clashes for atom in pp1[i]: ref_vector = atom.get_vector() for other_res in residue_up: try: if other_res != pp1[i - 1] and other_res != pp1[i + 1]: for other_atom in other_res: other_vector = other_atom.get_vector() d = other_vector - ref_vector if d.norm() < 2: self.clash_pairs.append([ (pp1[i]['CA'].get_bfactor(), pp1[i].get_id()[1]), (other_res['CA'].get_bfactor(), other_res.get_id()[1]) ]) except: pass
def build_peptides(self, structure): pp_list = self.ppb.build_peptides(structure, aa_only=False) if len(pp_list) == 0: #case of failure pp_list = CaPPBuilder().build_peptides(structure, aa_only=False) return pp_list