def seqres_atom_map(mmcif_dict, c): category = "_pdbx_poly_seq_scheme" seq_len = len(mmcif_dict[category + ".seq_id"]) seqres = {} for i in range(seq_len): seqres_index = mmcif_dict["_pdbx_poly_seq_scheme.seq_id"][i] pdb_seq_id = int(mmcif_dict["_pdbx_poly_seq_scheme.pdb_seq_num"][i]) chain = mmcif_dict["_pdbx_poly_seq_scheme.pdb_strand_id"][i] if chain == c: res = mmcif_dict["_pdbx_poly_seq_scheme.pdb_mon_id"][i] if res == "?": sres = "-" else: sres = tto("{}".format(res)) key1 = (seqres_index, chain) seqres[key1] = sres return seqres
def seqres_atom_map(mmcif_dict, c, p): if p != "NA": r1 = p[0:1] p1 = p[1:(len(p) - 1)] r2 = p[(len(p) - 1):len(p)] else: p1 = -1000 category = "_pdbx_poly_seq_scheme" seq_len = len(mmcif_dict[category + ".seq_id"]) seqres = dict() d = dict() j = 1 for i in range(seq_len): seqres_index = mmcif_dict["_pdbx_poly_seq_scheme.seq_id"][i] pdb_seq_id = int(mmcif_dict["_pdbx_poly_seq_scheme.pdb_seq_num"][i]) chain = mmcif_dict["_pdbx_poly_seq_scheme.pdb_strand_id"][i] if chain == c: res = mmcif_dict["_pdbx_poly_seq_scheme.pdb_mon_id"][i] if res == "?": sres = "-" else: d[pdb_seq_id] = j j = j + 1 try: sres = tto("{}".format(res)) except: sres = "X" # MUTATION if pdb_seq_id == int(p1): sres = r2 key1 = (seqres_index, chain) seqres[key1] = sres return (seqres, d)
f = open("omim_sprot.csv","r") ft = f.readlines() f.close() g = open("uniprot_pdb.csv","r") gt = g.readlines() g.close() h = open("disease_causing_mutations.txt","w") k = 0 while k < len(ft): ft1 = ft[k].split(",") uid = ft1[2] res1 = ft1[3] res1 = tto("{}".format(res1)) res2 = ft1[5] res2 = tto("{}".format(res2)) pos = ft1[7] dis = ft1[8].strip("\n") print("{} of {}".format(k,len(ft))) k1 = 2 gt1 = gt[k1].split(",") count = 0 while gt1[0] != uid: if k1 >= len(gt): pdb = "NA" count = count + 1 break
def main_func(): # CRITERIA r = 2.5 sd = mut_dat("{}/seq_sim.txt".format(pathfiles)) resolution = res_filter() HKW = HETATM_KW() import sys import re import gzip from Bio.PDB.MMCIFParser import MMCIFParser parser = MMCIFParser(QUIET=True) from Bio.PDB.Polypeptide import three_to_one as tto # ZONE VARIABLE zdis = 10.0 # PATH FOR THE PDB/mmCIF FILES #pathmmcif = "/Users/tarun/Documents/mmCIF" pathmmcif = "/Volumes/BIOINFO/mmCIF" # LOCAL RMSD OF THE POLYPEPTIDE PRESENT IN DICTIONARY sd # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION z = open("{}".format(sys.argv[3]), "w") #temp = open("COM.txt","w") count2 = 0 for x in sd.keys(): pdbid = "{}".format(x) pdb = pdbid[0:4] cw = pdbid[5:6] typ = HKW["{}".format(pdb)] # CHECKING IF THE PDB's ARE ALIGNED count1 = 0 if count1 == 0: #try: fol = pdb[1:3] pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess1.cif", "wb") out.write(tar.read()) tar.close() out.close() mmcif = MMCIF2Dict("pdbprocess1.cif") idmap1 = seqres_atom_map(mmcif) kk = 0 while kk < len(sd["{}".format(x)]): pdbid1 = "{}".format(sd["{}".format(x)][kk]) #print(pdbid1) pdb1 = pdbid1[0:4] cm = pdbid1[5:6] typ1 = HKW["{}".format(pdb1)] rescr = resolution[0]["{}".format(pdb1)] if rescr == "None": rescr = 1000.0 if typ1 == typ and float(rescr) < r: fol = pdb1[1:3] pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess2.cif", "wb") out.write(tar.read()) tar.close() out.close() mmcif = MMCIF2Dict("pdbprocess2.cif") idmap2 = seqres_atom_map(mmcif) count = 0 for i in idmap1.keys(): if i[1] == cw: for m in idmap2.keys(): if m[1] == cm and i[0] == m[0]: if idmap2[m] != idmap1[i]: count = count + 1 if count == 0: if count2 == 0: count2 = count2 + 1 #print("*** {} AND {} :: {} of {} ***" .format(pdbid,pdbid1,kk,len(sd["{}".format(x)]))) # FOR WILDTYPE # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE structure_id = "{}".format(pdb) filename = "pdbprocess1.cif" structure = parser.get_structure( structure_id, filename) model = structure[0] chain = model["{}".format(cw)] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 resid_list = [] resname_list = [] com_list = [] while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if c2[0] == " ": residue = chain[c2] tresname = residue.get_resname() try: resname = tto("{}".format(tresname)) except: resname = "X" r1 = residue.get_list( ) # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE k2 = 0 res = [] # ATOM NAMES cd = [] # ATOM COORDINATES while k2 < len(r1): r2 = r1[k2].get_id() # COM OF THE BACKBONE if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O": res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() cd.append(a1) k2 = k2 + 1 CM = COM(res, cd) resid_list.append(resid) resname_list.append(resname) com_list.append(CM) k1 = k1 + 1 k1 = 0 while k1 < len(resid_list): if resid_list[k1] == int(sys.argv[2]): v1 = com_list[k1] zres = "{}".format(resid_list[k1]) zresname = "{}".format(resname_list[k1]) k2 = 0 while k2 < len(resid_list): if k2 != k1: v2 = com_list[k2] dis = (v1[0] - v2[0])**2 + ( v1[1] - v2[1])**2 + (v1[2] - v2[2])**2 if dis < (zdis * zdis): zres = zres + ",{}".format( resid_list[k2]) zresname = zresname + "{}".format( resname_list[k2]) k2 = k2 + 1 k1 = len(resid_list) k1 = k1 + 1 z.write("{} {} {} {} {}\n".format( pdb, cw, pdb1, cm, zres)) else: print("WT AND MUT MISMATCH") print("{} of {} ;; {} {}".format(kk, len(sd["{}".format(x)]), pdbid, pdbid1)) kk = kk + 1 #except: #print("FILE NOT FOUND") z.close()
def salt_bridge(): import sys import re import gzip from Bio.PDB.MMCIFParser import MMCIFParser parser = MMCIFParser(QUIET=True) from Bio.PDB.Polypeptide import three_to_one as tto import numpy as np ALL = dict() ALL["D"] = "SB" ALL["E"] = "SB" ALL["K"] = "SB" ALL["R"] = "SB" ALL["H"] = "SB" ALLN = dict() ALLN["D"] = "SB" ALLN["E"] = "SB" ALLP = dict() ALLP["K"] = "SB" ALLP["R"] = "SB" ALLP["H"] = "SB" neg = ["D", "E"] negat = ["CG", "CD"] posi = ["K", "R", "H"] posat = ["NZ", "NE", ["ND1", "NE2"]] # ZONE VARIABLE zdis = 4.0 # PATH FOR THE PDB/mmCIF FILES #pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF" pathmmcif = "/Volumes/BIOINFO/mmCIF" #pathPDB = "/bmm/data/rcsb/data/structures/all/pdb" #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set" dis = open("mut_data.txt", "r") ht = dis.readlines() dis.close() # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION z = open("{}".format(sys.argv[3]), "w") #temp = open("COM.txt","w") start = sys.argv[1] end = sys.argv[2] if end == "END" or end == "end": end = len(ht) end = int(end) k = int(start) while k < end: # end = len(ht) mutant = [] mu = ht[k].split() pos = mu[2].strip("\n") check1 = mu[3].strip("\n") check2 = mu[4].strip("\n") kk = 0 while kk < 2: if check1 in ALL.keys() or check2 in ALL.keys(): pdbid = mu[(kk + 0)].strip("\n") pdb = pdbid[0:4] # PDB NAME C = pdbid[5:6] # CHAIN print("*** {} :: {} of {} ***".format(pdb, k, len(ht))) # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE #cc = 0 #if cc == 0: try: fol = pdb[1:3] pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess{}.cif".format(start), "wb") out.write(tar.read()) tar.close() out.close() structure_id = "{}".format(pdb) filename = "pdbprocess{}.cif".format(start) structure = parser.get_structure(structure_id, filename) model = structure[0] chain = model["{}".format(C)] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 resid_list = [] resname_list = [] com_list = [] while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if c2[0] == " ": residue = chain[c2] tresname = residue.get_resname() try: resname = tto("{}".format(tresname)) except: resname = "X" count = 0 kn = 0 while kn < len(neg): if resname == neg[kn]: count = count + 1 ksel = kn kn = kn + 1 if count == 0: count1 = 0 kn = 0 while kn < len(posi): if resname == posi[kn]: count1 = count1 + 1 ksel = kn kn = kn + 1 if count != 0: r1 = residue.get_list( ) # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE k2 = 0 res = [] # ATOM NAMES cd = [] # ATOM COORDINATES countc = 0 while k2 < len(r1): r2 = r1[k2].get_id() if r2 == negat[ksel]: res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() list1 = [a1[0], a1[1], a1[2]] cd.append(list1) countc = countc + 1 k2 = k2 + 1 if countc == 1: resid_list.append(resid) resname_list.append(resname) list1 = [cd[0][0], cd[0][1], cd[0][2]] com_list.append(list1) if count1 != 0: r1 = residue.get_list( ) # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE k2 = 0 res = [] # ATOM NAMES cd = [] # ATOM COORDINATES countc = 0 while k2 < len(r1): r2 = r1[k2].get_id() if ksel != 2: if r2 == posat[ksel]: res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() list1 = [a1[0], a1[1], a1[2]] cd.append(list1) countc = countc + 1 else: if r2 == posat[ksel][0] or r2 == posat[ ksel][1]: res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() list1 = [a1[0], a1[1], a1[2]] cd.append(list1) countc = countc + 1 k2 = k2 + 1 if countc == 1: resid_list.append(resid) resname_list.append(resname) list1 = [cd[0][0], cd[0][1], cd[0][2]] com_list.append(list1) if countc == 2: resid_list.append(resid) resname_list.append(resname) list1 = [ cd[0][0], cd[0][1], cd[0][2], cd[1][0], cd[1][1], cd[1][2] ] com_list.append(list1) k1 = k1 + 1 # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE k1 = 0 count = 0 while k1 < len(resid_list): posc = resid_list[k1] count = 0 if int(posc) == int(pos): count = count + 1 rcpos = k1 k1 = len(resid_list) k1 = k1 + 1 if count != 0: hd = -1 hd1 = -1 zresname = "{}".format(resname_list[rcpos]) if zresname == "H": v1 = com_list[rcpos] # COMPUTING THE DISTANCES list1 = [] for x in range(0, 2): skip = (x * 3) k3 = 0 while k3 < len(com_list): if k3 != rcpos: v2 = com_list[k3] zresnamec = "{}".format( resname_list[k3]) if zresnamec in ALLN.keys(): if zresnamec == "H" and int( hd) != int(resid_list[k3]): for y in range(0, 2): skip2 = y * 3 dis = ( v1[0 + skip] - v2[0 + skip2])**2 + ( v1[1 + skip] - v2[1 + skip2] )**2 + ( v1[2 + skip] - v2[2 + skip2])**2 if dis < (zdis * zdis): list1.append( resid_list[k3]) hd = resid_list[k3] y = 3 elif zresnamec != "H" and hd1 != 3: dis = (v1[0 + skip] - v2[0])**2 + ( v1[1 + skip] - v2[1] )**2 + (v1[2 + skip] - v2[2])**2 if dis < (zdis * zdis): list1.append( resid_list[k3]) hd1 = 3 k3 = k3 + 1 else: v1 = com_list[rcpos] if zresname in ALLP.keys(): k3 = 0 list1 = [] while k3 < len(com_list): if k3 != rcpos: v2 = com_list[k3] zresnamec = "{}".format( resname_list[k3]) if zresnamec in ALLN.keys(): if zresnamec == "H" and int( hd) != int(resid_list[k3]): for x in range(0, 2): skip = (x * 3) dis = ( v1[0] - v2[0 + skip] )**2 + ( v1[1] - v2[1 + skip] )**2 + (v1[2] - v2[2 + skip])**2 if dis < (zdis * zdis): list1.append( resid_list[k3]) hd = resid_list[k3] x = 2 elif zresnamec != "H": dis = (v1[0] - v2[0])**2 + ( v1[1] - v2[1])**2 + ( v1[2] - v2[2])**2 if dis < (zdis * zdis): list1.append( resid_list[k3]) k3 = k3 + 1 elif zresname in ALLN.keys(): k3 = 0 list1 = [] while k3 < len(com_list): if k3 != rcpos: v2 = com_list[k3] zresnamec = "{}".format( resname_list[k3]) if zresnamec in ALLP.keys(): if zresnamec == "H" and int( hd) != int(resid_list[k3]): for x in range(0, 2): skip = (x * 3) dis = ( v1[0] - v2[0 + skip] )**2 + ( v1[1] - v2[1 + skip] )**2 + (v1[2] - v2[2 + skip])**2 if dis < (zdis * zdis): list1.append( resid_list[k3]) hd = resid_list[k3] x = 2 elif zresnamec != "H": dis = (v1[0] - v2[0])**2 + ( v1[1] - v2[1])**2 + ( v1[2] - v2[2])**2 if dis < (zdis * zdis): list1.append( resid_list[k3]) k3 = k3 + 1 if len(list1) > 0: if kk == 0: z.write("{} {} {} {} {} YES WT {} {}\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"), list1, len(list1))) print("{} {} {} {} {} YES {} WT".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"), list1)) else: z.write( "{} {} {} {} {} YES MUT {} {}\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"), list1, len(list1))) print("{} {} {} {} {} YES {} MUT".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"), list1)) else: if kk == 0: z.write("{} {} {} {} {} NO WT\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) else: z.write("{} {} {} {} {} NO MUT\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) else: if kk == 0: z.write("{} {} {} {} {} NO WT\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) else: z.write("{} {} {} {} {} NO MUT\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) except: print("FILE NOT FOUND") if kk == 0: z.write("{} {} {} {} {} NO WT\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) else: z.write("{} {} {} {} {} NO MUT\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) kk = kk + 1 else: kk = 2 #print("NOT SALT BRIDGE RESIDUES") z.write("{} {} {} {} {} NO WT\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) z.write("{} {} {} {} {} NO MUT\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) k = k + 1 z.close()
def main_func(): pf = PFAM("pdb_pfam_mapping.txt") ca = cath("cath_domain.txt") md = mut_dat("set1_sc.txt") f = open("PFAM_dis.txt", "w") g = open("CATH_dis.txt", "w") import sys import re import gzip from Bio.PDB.MMCIFParser import MMCIFParser parser = MMCIFParser(QUIET=True) from Bio.PDB.Polypeptide import three_to_one as tto # ZONE VARIABLE zdis = 10.0 # PATH FOR THE PDB/mmCIF FILES #pathmmcif = "/Users/tarun/Documents/mmCIF" #pathmmcif = "/Volumes/BIOINFO/mmCIF" pathmmcif = "/data/pdb/divided/mmCIF" d1 = dict() sd = dict() nf = 0 for x in md: try: t1 = "{}".format(ca["{}".format(x)]) if t1 not in d1.keys() and t1 == "1": d1["{}".format(t1)] = [x] if x not in sd.keys(): sd["{}".format(x)] = md["{}".format(x)] elif t1 == "1": d1["{}".format(t1)].append(x) if x not in sd.keys(): sd["{}".format(x)] = md["{}".format(x)] except: nf = nf + 1 #print("{} NOT FOUND IN CATH".format(x)) #print("{} OUT OF {} ARE NOT FOUND IN CATH FILE".format(nf,len(md))) k = 1 for x in d1: t1 = len(d1["{}".format(x)]) g.write("{} {} {}\n".format(k, x, t1)) k = k + 1 d = dict() nf = 0 for x in sd: try: k1 = 0 while k1 < len(pf["{}".format(x)]): #print(pf["{}".format(x)) t1 = pf["{}".format(x)][k1][2] if t1 not in d.keys(): d["{}".format(t1)] = [x] else: d["{}".format(t1)].append(x) k1 = k1 + 1 except: nf = nf + 1 #print("{} NOT FOUND IN PFAM".format(x)) #print("{} OUT OF {} ARE NOT FOUND IN PFAM FILE".format(nf,len(md))) k = 1 for x in d: t1 = len(d["{}".format(x)]) f.write("{} {} {}\n".format(k, x, t1)) k = k + 1 f.close() g.close() # LOCAL RMSD OF THE POLYPEPTIDE PRESENT IN DICTIONARY sd # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION z = open("{}".format(sys.argv[3]), "w") #temp = open("COM.txt","w") start = sys.argv[1] end = sys.argv[2] if end == "END" or end == "end": end = len(sd) end = int(end) start = int(start) k = 0 for x in sd.keys(): print("# {} of {} #".format(k, end)) if k >= start: pdbid = "{}".format(x) pdb = pdbid[0:4] cw = pdbid[5:6] # CHECKING IF THE PDB's ARE ALIGNED #count1 = 0 #if count1 == 0: try: fol = pdb[1:3] pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess1{}.cif".format(start), "wb") out.write(tar.read()) tar.close() out.close() mmcif = MMCIF2Dict("pdbprocess1{}.cif".format(start)) idmap1 = seqres_atom_map(mmcif) kk = 0 while kk < len(sd["{}".format(x)]): pdbid1 = "{}".format(sd["{}".format(x)][kk]) #print(pdbid1) pdb1 = pdbid1[0:4] cm = pdbid1[5:6] fol = pdb1[1:3] pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess2{}.cif".format(start), "wb") out.write(tar.read()) tar.close() out.close() mmcif = MMCIF2Dict("pdbprocess2{}.cif".format(start)) idmap2 = seqres_atom_map(mmcif) count = 0 for i in idmap1.keys(): if i[1] == cw: for m in idmap2.keys(): if m[1] == cm and i[0] == m[0]: if idmap2[m] != idmap1[i]: count = count + 1 count2 = 0 if count == 0: if count2 == 0: count2 = count2 + 1 #print("*** {} AND {} :: {} of {} ***" .format(pdbid,pdbid1,kk,len(sd["{}".format(x)]))) # FOR WILDTYPE # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE structure_id = "{}".format(pdb) filename = "pdbprocess1{}.cif".format(start) structure = parser.get_structure( structure_id, filename) model = structure[0] chain = model["{}".format(cw)] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 resid_list = [] resname_list = [] com_list = [] while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if c2[0] == " ": residue = chain[c2] tresname = residue.get_resname() try: resname = tto("{}".format(tresname)) except: resname = "X" r1 = residue.get_list( ) # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE k2 = 0 res = [] # ATOM NAMES cd = [] # ATOM COORDINATES while k2 < len(r1): r2 = r1[k2].get_id() # COM OF THE BACKBONE if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O": res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() cd.append(a1) k2 = k2 + 1 CM = COM(res, cd) resid_list.append(resid) resname_list.append(resname) com_list.append(CM) k1 = k1 + 1 k1 = 20 ss = int((len(resid_list) - 40) / 5) if ss < 6: ss = 6 while k1 < (len(resid_list) - 20): v1 = com_list[k1] zres = "{}".format(resid_list[k1]) zresname = "{}".format(resname_list[k1]) k2 = 0 while k2 < len(resid_list): if k2 != k1: v2 = com_list[k2] dis = (v1[0] - v2[0])**2 + ( v1[1] - v2[1])**2 + (v1[2] - v2[2])**2 if dis < (zdis * zdis): zres = zres + ",{}".format( resid_list[k2]) zresname = zresname + "{}".format( resname_list[k2]) k2 = k2 + 1 k1 = k1 + ss z.write("{} {} {} {} {}\n".format( pdb, cw, pdb1, cm, zres)) #z.write("{}".format(pdbid)) #z.write("\n") #z.write("{}".format(zres)) #z.write("\n") #z.write("{}".format(zresname)) #z.write("\n") # FOR MUTANT #structure_id = "{}".format(pdb1) #filename = "pdbprocess2.cif" #structure = parser.get_structure(structure_id,filename) #model = structure[0] #chain = model["{}".format(cm)] #c1 = chain.get_list() # LIST ALL THE RESIDUES #k1 = 0 #resid_list = [] #resname_list = [] #com_list = [] #while k1 < len(c1): #c2 = c1[k1].get_id() #resid = c2[1] #if c2[0] == " ": #residue = chain[c2] #tresname = residue.get_resname() #try: #resname = tto("{}".format(tresname)) #except: #resname = "X" #r1 = residue.get_list() # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE #k2 = 0 #res = [] # ATOM NAMES #cd = [] # ATOM COORDINATES #while k2 < len(r1): #r2 = r1[k2].get_id() # COM OF THE BACKBONE #if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O": #res.append(r2) #atom = residue['{}'.format(r2)] #a1 = atom.get_coord() #cd.append(a1) #k2 = k2 + 1 #CM = COM(res,cd) #resid_list.append(resid) #resname_list.append(resname) #com_list.append(CM) #k1 = k1 + 1 #k1 = 20 #ss = int((len(resid_list) - 40) / 5) #if ss < 6: #ss = 6 #while k1 < (len(resid_list) -20): #v1 = com_list[k1] #zres = "{}".format(resid_list[k1]) #zresname = "{}".format(resname_list[k1]) #k2 = 0 #while k2 < len(resid_list): #if k2 != k1: #v2 = com_list[k2] #dis = (v1[0]-v2[0])**2 + (v1[1]-v2[1])**2 + (v1[2]-v2[2])**2 #if dis < (zdis*zdis): #zres = zres + ",{}".format(resid_list[k2]) #zresname = zresname + "{}".format(resname_list[k2]) #k2 = k2 + 1 #z.write("{}".format(pdbid1)) #z.write("\n") #z.write("{}".format(zres)) #z.write("\n") #z.write("{}".format(zresname)) #z.write("\n") #k1 = k1 + ss else: print("WT AND MUT MISMATCH") kk = kk + 1 except: print("FILE NOT FOUND") #z.write("{}".format(pdbid)) #z.write("\n") #z.write("NA") #z.write("\n") #z.write("NA") #xz.write("\n") k = k + 1 if k > end: break z.close()
def func2(arg1): from Bio.PDB.Polypeptide import three_to_one as tto AA = [ "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V" ] AT = [["CB"], ["CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"], ["CB", "CG", "OD1", "ND2"], ["CB", "CG", "OD1", "OD2"], ["CB", "SG"], ["CB", "CG", "CD", "OE1", "OE2"], ["CB", "CG", "CD", "OE1", "NE2"], [], ["CB", "CG", "ND1", "CE1", "NE2", "CD2"], ["CB", "CG1", "CG2", "CD1"], ["CB", "CG", "CD1", "CD2"], ["CB", "CG", "CD", "CE", "NZ"], ["CB", "CG", "SD", "CE"], ["CB", "CG", "CD1", "CE1", "CZ", "CE2", "CD2"], ["CB", "CG", "CD"], ["CB", "OG"], ["CB", "CG2", "OG1"], ["CB", "CG", "CD1", "NE1", "CE2", "CD2", "CE3", "CZ3", "CH2", "CZ2"], ["CB", "CG", "CD1", "CE1", "CZ", "OH", "CE2", "CD2"], ["CB", "CG1", "CG2"]] IN = [[], ["NH1", "NH2"], [], ["OD1", "OD2"], [], ["OE1", "OE2"], [], [], [], [], ["CD1", "CD2"], [], [], ["CD1", "CD2"], [], [], [], [], ["CD1", "CD2"], ["CG1", "CG2"]] IN1 = [[], [], [], [], [], [], [], [], [], [], [], [], [], ["CE1", "CE2"], [], [], [], [], ["CE1", "CE2"], []] k = 0 dat = dict() drot = dict() drot1 = dict() while k < len(AA): dat[AA[k]] = AT[k] drot[AA[k]] = IN[k] drot1[AA[k]] = IN1[k] k = k + 1 from Bio.PDB.PDBParser import PDBParser parser = PDBParser(QUIET=True) # CONVERTING RASMOL FILES INTO PDB FORMAT f = open("output_all", "r") ft = f.readlines() f.close() g = open("file1.pdb", "w") # ONLY C-ALPHA k = 0 while k < len(ft): ft1 = ft[k].split() if ft1[0] == "ATOM" or ft1[0].strip( "\n") == "TER" or ft1[0] == "REMARK": g.write("{}".format(ft[k])) k = k + 1 g.close() f = open("output_all_atm", "r") ft = f.readlines() f.close() g = open("file2.pdb", "w") # ALL ATOMS k = 0 while k < len(ft): ft1 = ft[k].split() if ft1[0] == "ATOM" or ft1[0].strip( "\n") == "TER" or ft1[0] == "REMARK": g.write("{}".format(ft[k])) k = k + 1 g.close() # READING THE GLOBAL RMSD VALUE AND THE COVERAGE VALUE f = open("temp", "r") ft = f.readlines() f.close() k = 0 ft1 = ft[k].split() try: t1 = ft1[0] except: t1 = "NA" while t1 != "Length": k = k + 1 ft1 = ft[k].split() try: t1 = ft1[0] except: t1 = "NA" ft1 = ft[k].split() lenwt = ft1[3] k = k + 1 ft1 = ft[k].split() lenmut = ft1[3] ft1 = ft[k].split() try: t1 = ft1[0] except: t1 = "NA" while t1 != "Aligned": k = k + 1 ft1 = ft[k].split() try: t1 = ft1[0] except: t1 = "NA" ft1 = ft[k].split(",") t1 = ft1[1].split() grmsd = t1[1] t1 = ft1[0].split() coverage = t1[2] g = open("results", "w") g.write("{} {} {} {}\n".format(grmsd, lenwt, lenmut, coverage)) if arg1 == 1: reswt1 = [] reswt2 = [] resmut1 = [] resmut2 = [] cdwt1 = [] cdmut1 = [] cdwt2 = [] cdmut2 = [] atwt2 = [] atmut2 = [] resnamewt1 = [] list1 = sys.argv[2].split(",") wt = dict() k = 1 while k < len(list1): wt[int(list1[k])] = "in" k = k + 1 # LOCAL RMSD BASED ON C-ALPHA CARBONS AND SIDE CHAINS pdb = "file2" structure_id = "{}".format(pdb) filename = "{}.pdb".format(pdb) structure = parser.get_structure(structure_id, filename) model = structure[0] # FOR WILD TYPE chain = model["A"] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if resid in wt.keys(): reswt1.append(resid) residue = chain[c2] tresname = residue.get_resname() resname = tto("{}".format(tresname)) resnamewt1.append(resname) r1 = residue.get_list() # LIST ALL THE ATOMS k2 = 0 while k2 < len(r1): r2 = r1[k2].get_id() if r2 == "CA": atom = residue["{}".format(r2)] a1 = atom.get_coord() cdwt1.append(a1) if r2 != "CA" and r2 != "N" and r2 != "C" and r2 != "O" and r2[ 0:1] != "H": # ONLY SIDE CHAIN atwt2.append(r2) atom = residue["{}".format(r2)] a1 = atom.get_coord() cdwt2.append(a1) k2 = k2 + 1 k1 = k1 + 1 # FOR MUTANT chain = model["B"] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if resid in wt.keys(): resmut1.append(resid) residue = chain[c2] r1 = residue.get_list() # LIST ALL THE ATOMS k2 = 0 while k2 < len(r1): r2 = r1[k2].get_id() if r2 == "CA": atom = residue["{}".format(r2)] a1 = atom.get_coord() cdmut1.append(a1) if r2 != "CA" and r2 != "N" and r2 != "C" and r2 != "O" and r2[ 0:1] != "H": # ONLY SIDE CHAIN atmut2.append(r2) atom = residue["{}".format(r2)] a1 = atom.get_coord() cdmut2.append(a1) k2 = k2 + 1 k1 = k1 + 1 # CALCULATING LOCAL RMSD FOR C-ALPHA AND SIDE CHAINS if len(cdwt1) == len(cdmut1): lrmsd1 = RMSD(cdwt1, cdmut1, len(reswt1)) else: lrmsd1 = "NA" if len(cdwt2) == len(cdmut2): lrmsd2 = RMSD_SC(cdwt2, cdmut2, len(reswt1), atwt2, atmut2, resnamewt1, reswt1, dat, drot, drot1) else: lrmsd2 = "NA" g.write("{} {}".format(lrmsd1, lrmsd2)) g.close()
def disulphide_bond(): import sys import re import gzip from Bio.PDB.MMCIFParser import MMCIFParser parser = MMCIFParser(QUIET=True) from Bio.PDB.Polypeptide import three_to_one as tto import numpy as np # ZONE VARIABLE zdis = 2.2 # PATH FOR THE PDB/mmCIF FILES #pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF" pathmmcif = "/Volumes/BIOINFO/mmCIF" #pathPDB = "/bmm/data/rcsb/data/structures/all/pdb" #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set" dis = open("mut_data.txt", "r") ht = dis.readlines() dis.close() # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION z = open("{}".format(sys.argv[3]), "w") #temp = open("COM.txt","w") start = sys.argv[1] end = sys.argv[2] if end == "END" or end == "end": end = len(ht) end = int(end) k = int(start) while k < end: # end = len(ht) mutant = [] mu = ht[k].split() check1 = mu[3].strip("\n") check2 = mu[4].strip("\n") pos = mu[2].strip("\n") count3 = 0 if check1 == "C": pdbid = mu[0].strip("\n") pdb = pdbid[0:4] # PDB NAME C = pdbid[5:6] # CHAIN count3 = count3 + 1 elif check2 == "C": pdbid = mu[1].strip("\n") pdb = pdbid[0:4] # PDB NAME C = pdbid[5:6] # CHAIN count3 = count3 + 1 if count3 != 0: print("*** {} :: {} of {} ***".format(pdb, k, len(ht))) # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE #cc = 0 #if cc == 0: try: fol = pdb[1:3] pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess{}.cif".format(start), "wb") out.write(tar.read()) tar.close() out.close() structure_id = "{}".format(pdb) filename = "pdbprocess{}.cif".format(start) structure = parser.get_structure(structure_id, filename) model = structure[0] chain = model["{}".format(C)] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 resid_list = [] resname_list = [] com_list = [] com_list1 = [] while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if c2[0] == " ": residue = chain[c2] tresname = residue.get_resname() try: resname = tto("{}".format(tresname)) except: resname = "X" if resname == "C": r1 = residue.get_list( ) # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE k2 = 0 res = [] # ATOM NAMES cd = [] # ATOM COORDINATES CB cd1 = [] # ATOM COORDINATES SG count = 0 while k2 < len(r1): r2 = r1[k2].get_id() # COM OF THE BACKBONE if r2 == "CB": res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() list1 = [a1[0], a1[1], a1[2]] cd.append(list1) count = count + 1 if r2 == "SG": res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() list1 = [a1[0], a1[1], a1[2]] cd1.append(list1) count = count + 1 k2 = k2 + 1 if count == 2: resid_list.append(resid) resname_list.append(resname) list1 = [cd[0][0], cd[0][1], cd[0][2]] com_list.append(list1) list1 = [cd1[0][0], cd1[0][1], cd1[0][2]] com_list1.append(list1) k1 = k1 + 1 # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE k1 = 0 count = 0 while k1 < len(resid_list): posc = resid_list[k1] count = 0 if int(posc) == int(pos): count = count + 1 rcpos = k1 k1 = len(resid_list) k1 = k1 + 1 if count != 0: zresname = "{}".format(resname_list[rcpos]) v1 = com_list1[rcpos] v11 = com_list[rcpos] # COMPUTING THE DISTANCES k3 = 0 list1 = [] while k3 < len(com_list1): if k3 != rcpos: v2 = com_list1[k3] v21 = com_list[k3] dis = (v1[0] - v2[0])**2 + (v1[1] - v2[1])**2 + ( v1[2] - v2[2])**2 if dis < (zdis * zdis): p = np.array([v1, v11, v2, v21]) ang = dihedral(p) # DIHEDRAL CRITERIA if ang > 75.0 and ang < 105.0: list1.append(resid_list[k3]) k3 = k3 + 1 if len(list1) > 0: z.write("{} {} {} {} {} YES {}\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"), list1)) print("{} {} {} {} {} YES {}".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"), list1)) else: z.write("{} {} {} {} {} NO\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) else: z.write("{} {} {} {} {} NO\n".format( mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) except: print("FILE NOT FOUND") z.write("{} {} {} {} {} NO\n".format(mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) else: #print("NOT CYSTINE") z.write("{} {} {} {} {} NO\n".format(mu[0], mu[1], mu[2], mu[3], mu[4].strip("\n"))) k = k + 1 z.close()
def neighbours(): import sys import re import gzip from Bio.PDB.MMCIFParser import MMCIFParser parser = MMCIFParser(QUIET=True) from Bio.PDB.Polypeptide import three_to_one as tto # ZONE VARIABLE zdis = 10.0 # PATH FOR THE PDB/mmCIF FILES pathmmcif = "/Users/tarun/Documents/mmCIF" #pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF" #pathPDB = "/bmm/data/rcsb/data/structures/all/pdb" #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set" dis = open("distinct_mutants_only_cluster.txt", "r") ht = dis.readlines() dis.close() # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION z = open("{}".format(sys.argv[3]), "w") #temp = open("COM.txt","w") start = sys.argv[1] end = int(sys.argv[2]) k = int(start) while k < end: # end = len(ht) mutant = [] mu = ht[k].split(',') pdbid = mu[0].strip('[|\,|\'|]') pdb = pdbid[0:4] # PDB NAME C = pdbid[5:6] # CHAIN print("*** {} :: {} of {} ***".format(pdb, k, len(ht))) mutant.append(pdbid) # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE try: fol = pdb[1:3] pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess{}.cif".format(start), "wb") out.write(tar.read()) tar.close() out.close() structure_id = "{}".format(pdb) filename = "pdbprocess{}.cif".format(start) structure = parser.get_structure(structure_id, filename) model = structure[0] chain = model["{}".format(C)] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 resid_list = [] resname_list = [] com_list = [] avg_tf = [] max_tf = [] while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if c2[0] == " ": residue = chain[c2] tresname = residue.get_resname() try: resname = tto("{}".format(tresname)) except: resname = "X" r1 = residue.get_list( ) # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE k2 = 0 res = [] # ATOM NAMES cd = [] # ATOM COORDINATES bf = [] while k2 < len(r1): r2 = r1[k2].get_id() # COM OF THE BACKBONE if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O": res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() tf = atom.get_bfactor() cd.append(a1) bf.append(tf) k2 = k2 + 1 TF = temp_factor(bf) CM = COM(res, cd) resid_list.append(resid) resname_list.append(resname) com_list.append(CM) avg_tf.append(TF[0]) max_tf.append(TF[1]) k1 = k1 + 1 #temp.write("{}".format(com_list)) #temp.write("\n") # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE k1 = 1 while k1 < len(mu): pos = mu[k1].strip(' |[|,|]|\'|\n') pos = int(pos) zres = "{}".format(pos) zresname = "NA" k2 = 0 count = 0 while k2 < len(resid_list): posc = int(resid_list[k2]) if posc == pos: count = count + 1 rcpos = k2 k2 = len(resid_list) k2 = k2 + 1 if count != 0: avgtf = "{}".format(avg_tf[rcpos]) maxtf = "{}".format(max_tf[rcpos]) v1 = com_list[rcpos] # COMPUTING THE DISTANCES k3 = 0 while k3 < len(com_list): if k3 != rcpos: v2 = com_list[k3] dis = (v1[0] - v2[0])**2 + (v1[1] - v2[1])**2 + ( v1[2] - v2[2])**2 if dis < (zdis * zdis): zres = zres + ",{}".format(resid_list[k3]) avgtf = avgtf + ",{}".format(avg_tf[k3]) maxtf = maxtf + ",{}".format(max_tf[k3]) k3 = k3 + 1 z.write("{}".format(pdbid)) z.write("\n") z.write("{}".format(zres)) z.write("\n") z.write("{}".format(avgtf)) z.write("\n") z.write("{}".format(maxtf)) z.write("\n") k1 = k1 + 1 except: print("FILE NOT FOUND") z.write("{}".format(pdbid)) z.write("\n") z.write("NA") z.write("\n") z.write("NA") z.write("\n") z.write("NA") z.write("\n") k = k + 1 #temp.close() z.close()
def neighbours(): import sys import re import gzip from Bio.PDB.PDBParser import PDBParser parser = PDBParser(PERMISSIVE=0, QUIET=True) from Bio.PDB.Polypeptide import three_to_one as tto # ZONE VARIABLE zdis = 10.0 # PATH FOR THE PDB/mmCIF FILES pathmmcif = "/bmm/data/pdbmmcif/data/structures/all/mmCIF" pathPDB = "/bmm/data/rcsb/data/structures/all/pdb" #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set" file1 = sys.argv[1] f = open("{}".format(file1), "r") # MUTANTS ft = f.readlines() f.close() file2 = sys.argv[2] g = open("{}".format(file2), "r") # MUTATIONS gt = g.readlines() g.close() h = open("structure.txt", "w") # DETERMINING THE DISTINCT MUTATIONS AND MUTANTS dis = open("distinct_mutants.txt", "w") k = 0 while k < len(ft): dis_mut = [] mu = ft[k].split(',') mut = gt[k].split(',') if len(mu) > 1: pdb = mu[0] dis_mut.append(pdb) k1 = 1 while k1 < len(mut): dumstr2 = mut[k1].strip("\n") mut_res = dumstr2[1:len(dumstr2) - 1] k2 = 1 count = 0 while k2 < len(dis_mut): if dis_mut[k2] == mut_res: count = count + 1 k2 = k2 + 1 if count == 0: dis_mut.append(mut_res) k1 = k1 + 1 dis.write("{}".format(dis_mut)) dis.write("\n") k = k + 1 dis.close() dis = open("distinct_mutants.txt", "r") ht = dis.readlines() dis.close() # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION z = open("zone.txt", "w") temp = open("COM.txt", "w") k = len(ht) - 10 while k < len(ht): # end = len(ht) mutant = [] mu = ht[k].split(',') pdbid = mu[0].strip('[|\,|\'|]') pdb = pdbid[0:4] # PDB NAME C = pdbid[5:6] # CHAIN print("*** {} :: {} of {} ***".format(pdb, k, len(ht))) mutant.append(pdbid) # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE try: pdbfile = "{}/pdb{}.ent.gz".format(pathPDB, pdb) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess.pdb", "wb") out.write(tar.read()) tar.close() out.close() structure_id = "{}".format(pdb) filename = "pdbprocess.pdb" structure = parser.get_structure(structure_id, filename) model = structure[0] chain = model["{}".format(C)] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 resid_list = [] resname_list = [] com_list = [] while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if c2[0] == " ": residue = chain[c2] tresname = residue.get_resname() try: resname = tto("{}".format(tresname)) except: resname = "X" r1 = residue.get_list( ) # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE k2 = 0 res = [] # ATOM NAMES cd = [] # ATOM COORDINATES while k2 < len(r1): r2 = r1[k2].get_id() # COM OF THE BACKBONE if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O": res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() cd.append(a1) k2 = k2 + 1 CM = COM(res, cd) resid_list.append(resid) resname_list.append(resname) com_list.append(CM) k1 = k1 + 1 temp.write("{}".format(com_list)) temp.write("\n") # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE k1 = 1 while k1 < len(mu): pos = mu[k1].strip(' |[|,|]|\'|\n') pos = int(pos) z.write("{}".format(pdbid)) z.write("\n") zres = "{}".format(pos) zresname = "NA" k2 = 0 count = 0 while k2 < len(resid_list): posc = int(resid_list[k2]) if posc == pos: count = count + 1 rcpos = k2 k2 = len(resid_list) k2 = k2 + 1 if count != 0: zresname = "{}".format(resname_list[rcpos]) v1 = com_list[rcpos] # COMPUTING THE DISTANCES k3 = 0 while k3 < len(com_list): if k3 != rcpos: v2 = com_list[k3] dis = (v1[0] - v2[0])**2 + (v1[1] - v2[1])**2 + ( v1[2] - v2[2])**2 if dis < (zdis * zdis): zres = zres + ",{}".format(resid_list[k3]) zresname = zresname + "{}".format( resname_list[k3]) k3 = k3 + 1 z.write("{}".format(zres)) z.write("\n") z.write("{}".format(zresname)) z.write("\n") k1 = k1 + 1 except: print("FILE NOT FOUND") z.write("NA") z.write("\n") z.write("NA") z.write("\n") k = k + 1 temp.close() z.close()
def neighbours(): import sys import re import gzip from Bio.PDB.MMCIFParser import MMCIFParser parser = MMCIFParser(QUIET=True) from Bio.PDB.Polypeptide import three_to_one as tto # ZONE VARIABLE #zdis = 10.0 # PATH FOR THE PDB/mmCIF FILES pathmmcif = "/Volumes/BIOINFO/mmCIF" #pathPDB = "/bmm/data/rcsb/data/structures/all/pdb" #pathPDB = "/bmm/home/tkhanna1/Documents/Database/First_10000/test_set" # DETERMINING THE ZONE AROUND THE MUTANT SITE TO DETERMINE ANY STRUCTURAL CHANGE TAKING PLACE DUE TO THE MUTATION f = open("{}".format(sys.argv[1]), "r") ft = f.readlines() f.close() z = open("{}.txt".format(sys.argv[4]), "w") #temp = open("COM.txt","w") start = sys.argv[2] end = (sys.argv[3]) if end == "END" or end == "end": end = len(ft) end = int(end) k = int(start) while k < end: mutant = [] mu = ft[k].split() pdbid = mu[0] pdb = pdbid[0:4] # PDB NAME C = pdbid[5:6] # CHAIN pdbidmut = mu[1] pdbmut = pdbidmut[0:4] Cmut = pdbidmut[5:6] reswt = mu[3] resmut = mu[4].strip("\n") print("*** {} :: {} of {} ***".format(pdb, k, end)) mutant.append(pdbid) # EXCUTE THE CODE TO PICK UP THE DESIRED ZONE AROUD THE RESIDUE #ccount = 0 #if ccount == 0: try: fol = pdb[1:3] pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess{}.cif".format(start), "wb") out.write(tar.read()) tar.close() out.close() structure_id = "{}".format(pdb) filename = "pdbprocess{}.cif".format(start) structure = parser.get_structure(structure_id, filename) model = structure[0] chain = model["{}".format(C)] c1 = chain.get_list() # LIST ALL THE RESIDUES k1 = 0 resid_list = [] resname_list = [] com_list = [] while k1 < len(c1): c2 = c1[k1].get_id() resid = c2[1] if c2[0] == " ": residue = chain[c2] tresname = residue.get_resname() try: resname = tto("{}".format(tresname)) except: resname = "X" r1 = residue.get_list( ) # LIST ALL THE ATOMS OF A PARTICULAR RESIDUE k2 = 0 res = [] # ATOM NAMES cd = [] # ATOM COORDINATES while k2 < len(r1): r2 = r1[k2].get_id() # COM OF THE BACKBONE if r2 == "CA" or r2 == "N" or r2 == "C" or r2 == "O": res.append(r2) atom = residue['{}'.format(r2)] a1 = atom.get_coord() cd.append(a1) k2 = k2 + 1 CM = COM(res, cd) resid_list.append(resid) resname_list.append(resname) com_list.append(CM) k1 = k1 + 1 #temp.write("{}".format(com_list)) #temp.write("\n") # DETERMINING THE ZONE AROUND THE MUTATED RESIDUE pos = mu[2].strip("\n") pos = int(pos) k2 = 0 count = 0 while k2 < len(resid_list): posc = int(resid_list[k2]) if posc == pos: count = count + 1 rcpos = k2 k2 = len(resid_list) k2 = k2 + 1 if count != 0: list1 = [pos] for zdis in range(4, 11, 1): zres = "{}".format(pos) v1 = com_list[rcpos] # COMPUTING THE DISTANCES k3 = 0 while k3 < len(com_list): if k3 != rcpos: v2 = com_list[k3] dis = (v1[0] - v2[0])**2 + (v1[1] - v2[1])**2 + ( v1[2] - v2[2])**2 if dis < (zdis * zdis): zres = zres + ",{}".format(resid_list[k3]) k3 = k3 + 1 list1.append(zres) z.write("{} {} {} {} {} {} {} {} {} {} {} {} {} {}\n".format( pdb, C, pdbmut, Cmut, list1[0], list1[1], list1[2], list1[3], list1[4], list1[5], list1[6], list1[7], reswt, resmut)) except: print("FILE NOT FOUND") #z.write("{}".format(pdbid)) #z.write("\n") #z.write("NA") #z.write("\n") #z.write("NA") #z.write("\n") k = k + 1 #temp.close() z.close()