def load_chains(raw, pdb_id, pdb_type, known): parser = PDBParser() structure = parser.get_structure(pdb_id, raw) data = {'ordering': []} for model in structure: for chain in model: chain_id = chain.get_id() data[chain_id] = {'residues': [], 'sequence': []} for residue in chain: name = residue.resname.strip() if name in known: res_id = residue.get_id() id_data = [structure.get_id(), pdb_type, model.get_id(), chain_id, res_id[1], residue.resname, res_id[2]] id_data = [str(part).strip() for part in id_data] unit_id = '_'.join(id_data) data[chain_id]['residues'].append(unit_id) data[chain_id]['sequence'].append(known[name]) data['ordering'].append(unit_id) if not data[chain_id]['residues']: del data[chain_id] else: data[chain_id]['sequence'] = ''.join(data[chain_id]['sequence']) return data
def get_aa_residues(pdb, chain): """ pdb: Protein Data Bank file. chain: Chain of the PDB file. Get the amino acids from a protein. returns: List of Biopython PDB Residue objects representing the amino acids of the specified protein. """ parser = PDBParser() structure = parser.get_structure("prot", pdb) model = structure[0] chain = model[chain] # Get a list of all residues in the specified protein model. residue_list = list(chain.get_residues()) to_remove_list = [] for res in residue_list: # Store non-amino acid residues in PDB in another list. if res.get_id()[0] != " ": to_remove_list.append(res) # Remove non-amino acid residues from original list. for res in to_remove_list: residue_list.remove(res) return residue_list
def main(file, atom, CAd=15, CBd=12, mind=6): """Analyze the pdb using distance between atom and minimum distances.""" logging.info("Analyzing %s using %s", file, atom) dist = {"CA": CAd, "CB": CBd, "min": mind} base = os.path.basename(args.file) name_f = os.path.splitext(base)[0] parser = PDBParser(PERMISSIVE=1) logging.captureWarnings(True) structure = parser.get_structure("test", file) residues = filter_residues(structure) dist_matrix = calc_dist_matrix(residues, atom) title_dist = 'Distances of the file {}'.format(name_f) name_heatmap = plots.plot_heatmap(dist_matrix, name_f, title_dist, atom) logging.info("Heatmap %s created", name_heatmap) cont_matrix = contact_map(dist_matrix, atom, dist) title_bin = 'Distance contacts of the file {}'.format(name_f) name_bin = plots.plot_matrix_binary(cont_matrix, name_f, title_bin, atom) logging.info("Contact map %s created", name_bin) logging.captureWarnings(False) return(dist_matrix, cont_matrix)
def get_normalized_pairs(n): '''Return a dictionary with keys corresponding to the pairs of residues found within a radius n, and the values to the number of times found in a set of pdb files.\ This dictionary sets the knowledge of pair-residues at a given frequency found naturally\ in nature. It is based in 1.110 sequences with known structure with <40% of homology in\ order to avoid family redundancy. Not necessary for the package.''' p = PDBParser(PERMISSIVE=1) pdb = glob.glob('./pdbfiles/*.ent') pairs = [] file_list = [] ###### Parsing through PDB files ####### for filename in pdb: s = p.get_structure('X', filename) atom_list = np.array([atom for atom in s.get_atoms() if atom.name == 'CB']) if len(atom_list)>2: #creates a list containing all atom pairs within a n radius ns = Bio.PDB.NeighborSearch(atom_list) neighbors = ns.search_all(n) file_list.append(filename) sys.stderr.write(filename+' processed.\n') #check-point else: sys.stderr.write(filename+' could not be processed.\n') #check-point pass pairs = [(x.get_parent().get_resname(),y.get_parent().get_resname()) for x,y in neighbors] outfile = open( 'normalized_pairs8.py', 'w' ) counter = dict(Counter(pairs)) sys.stderr.write(str(len(file_list))+' files processed.\n') #check-point sys.stderr.write('Dictionary length: '+str(len(counter))+'.\n') #check-point outfile.write('\nNormalized_pairs_'+str(n)+'='+str(counter)) outfile.close()
def Init(): ptask = open("task.input","r") para = {} jobs = [] for line in ptask.readlines(): if(line[0]=='/' or line[0]=='\n'): continue [a,b] = line.split("=") if a=='angle': jobs.append([float(x) for x in b.strip().split(',')]) else: para[a]=b.strip() ptask.close() filename = para['protein_file'] protein_name = filename.strip().split('.')[0] file_type = filename.strip().split('.')[1] if file_type == 'cif': mt = MMCIF2Dict(filename) xlist = [float(x) for x in mt['_atom_site.Cartn_x']] ylist = [float(x) for x in mt['_atom_site.Cartn_y']] zlist = [float(x) for x in mt['_atom_site.Cartn_z']] allarr = numpy.vstack((xlist,ylist,zlist)).T elif file_type == 'pdb': parser = PDBParser() structure = parser.get_structure("test", filename) atoms = structure.get_atoms() alllist = [] xlist = [] ylist = [] zlist = [] for atom in atoms: xlist.append(atom.get_coord()[0]) ylist.append(atom.get_coord()[1]) zlist.append(atom.get_coord()[2]) alllist.append(atom.get_coord()) allarr = numpy.array(alllist) if para['CENTER'] == 'ON': x_ave = allarr.mean(axis=0)[0] y_ave = allarr.mean(axis=0)[1] z_ave = allarr.mean(axis=0)[2] allarr[:,0] = allarr[:,0]-x_ave; allarr[:,1] = allarr[:,1]-y_ave; allarr[:,2] = allarr[:,2]-z_ave scr_size = int(para['scr_size']) pix_size = float(para['pix_size']) distance = float(para['distance']) wavenum = 1.0/float(para['lambda']) ssc = scr_size/2.0-0.5 s = numpy.zeros((scr_size,scr_size,3)) for i in range(scr_size): for j in range(scr_size): x = (i-ssc)*pix_size y = (j-ssc)*pix_size z = distance sr = numpy.sqrt(x*x+y*y+z*z) s[i,j,:] = numpy.array([x*wavenum/sr,y*wavenum/sr,z*wavenum/sr-wavenum]) return s,allarr
def getPdbSequance(pdb_file, chain_id): pdb_indexes = [] pdb_sequance = [] p = PDBParser(PERMISSIVE=1) s = p.get_structure("", pdb_file) pdb_id = pdb_file[0:-4] if not s[0].has_id(chain_id): print "PDB "+pdb_id+" doesn't have chain with id "+chain_id print exit() chain = s[0][chain_id] ires = 0 for res in chain: is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id('C') and (res.get_resname()=='GLY' or res.has_id('CB')) res_id = res.get_id()[0] if (res_id ==' ' or res_id =='H_MSE' or res_id =='H_M3L' or res_id =='H_CAS') and is_regular_res: ires = ires + 1 res_name = res.get_resname() residue_no = res.get_id()[1] pdb_sequance.append(res_name) pdb_indexes.append(residue_no) elif res_id !='W': print "Unknown residue in "+pdb_id+" with res_id "+res_id pdb_seq = three2one(pdb_sequance) return pdb_seq, pdb_indexes
def deleteChain():# Delete a complete chain from a pdb and save the new structure in pdbname_free.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() seq='' nb_chain=input('How many chain do you want to delete : ') for i in range(nb_chain): rm_chain=raw_input('What chain you want to delete : ') for model in structure: for chain in model: if(chain.id==rm_chain): model.detach_child(chain.id) pept = raw_input('Do you want to get a pdb with the sequence in its name : ') if(pept == 'y'): ppb=PPBuilder() for pp in ppb.build_peptides(structure): seq = seq + pp.get_sequence() seq=seq.lower() seq=str(seq) w = PDBIO() w.set_structure(structure) w.save(seq+'_bound.pdb') else: w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_without'+rm_chain+'.pdb')
def __pdb_ordering__(self, raw, pdb_id, pdb_type): """Generate a dict of the form: { unit_id: {index: index, pdb: pdb } for all nucleotides in the given structure. Nucleotides are identified by being in the list of known units in self.known. """ parser = PDBParser(QUIET=True) structure = parser.get_structure(pdb_id, raw) data = {} index = 0 for model in structure: model_id = model.get_id() + 1 for chain in model: chain_id = chain.get_id() for residue in chain: name = residue.resname.strip() if name in self.known: res_id = residue.get_id() id_data = [structure.get_id(), pdb_type, model_id, chain_id, res_id[1], name, res_id[2]] id_data = [str(part).strip() for part in id_data] unit_id = '_'.join(id_data) data[unit_id] = {'index': index, 'pdb': pdb_id} index += 1 return data
def old_residue_ids(raw, filename): parser = PDBParser() path, ext = os.path.splitext(filename) pdb_id = os.path.basename(path) structure = parser.get_structure(pdb_id, raw) data = [] pdb_type = 'AU' if ext != '.pdb': pdb_type = 'BA' + filename[-1] for model in structure: # BioPython seems to start number models at 0, but it should start # at 1. model_id = str(model.get_id() + 1) for chain in model: chain_id = chain.get_id() for residue in chain: res_id = residue.get_id() data.append({ 'pdb': pdb_id, 'type': pdb_type, 'model': model_id, 'chain': chain_id, 'number': str(res_id[1]), 'unit': residue.resname.strip(), 'insertion': res_id[2].rstrip() }) return data
def get_info(filename): ''' Return header. Function adapted from Biopython Package.\n get_info(filename)\n Filename needs to be a PDB file format (*.ent or *.pdb) ''' p = PDBParser(QUIET=True) s = p.get_header()
def score(PDBfile): """ Calculates the m-score for a given PDB file arguments: PDBfile - the PDB file to score hidden arguments: aas.scr, pro.scr, gly.scr - the scoring tables need to be present in working directory """ from pro_angle import find_residue from Bio.PDB.PDBParser import PDBParser from pro_length import length (aas, gly, pro) = load_scores() ##define global tables score = 0 #initialize pars = PDBParser(PERMISSIVE = 1) struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile) model = struct.child_list[0] chain = model.child_list[0] pro_list = find_residue(chain, 'PRO') gly_list = find_residue(chain, 'GLY') aas_list = range(chain.child_list[1].id[1], chain.child_list[len(chain)-1].id[1]) #need to remove pro/gly indices in first/last position if pro_list.count(1) > 0: pro_list.remove(1) if pro_list.count(len(chain)-1) > 0: pro_list.remove(len(chain)-1) if gly_list.count(1) > 0: gly_list.remove(1) if gly_list.count(len(chain)-1) > 0: gly_list.remove(len(chain)-1) try: for index in pro_list: aas_list.remove(index) #remove pros from aas_list for index in gly_list: aas_list.remove(index) #remove glys from aas_list except ValueError: print 'incosistency in PDB file - will return score = 0' return 0 else: proscore = score_help(chain, pro_list, pro) glyscore = score_help(chain, gly_list, gly) aasscore = score_help(chain, aas_list, aas) score = proscore+glyscore+aasscore size=length(chain) try: score = (score/size)*1000 #normalize score return score except ZeroDivisionError: print "calculated protein length 0 -> returning score 0" score = 0 return score
def get_structure(pdb_id): '''Returns a PDB structure.''' source_url = 'http://www.rcsb.org/pdb/files/' + pdb_id + '.pdb' target_filename = os.path.join(os.path.expanduser('~'), _DIR, _PDB_DIR, pdb_id + '.pdb') with open(io_utils.get_file(source_url, target_filename)) as pdb_file: parser = PDBParser(QUIET=True) return parser.get_structure(pdb_id, pdb_file.name)
def main(): if len(sys.argv) < 2: sys.exit("Usage: %s input_pdb_file" % sys.argv[0]) pdb_name = sys.argv[1] parser = PDBParser(PERMISSIVE=1) structure_id = "temp" structure = parser.get_structure(structure_id, pdb_name) model = structure[0] calculate_ss(model)
def parse(self, *pdb_filenames): """ REQUIRED. Adds the protein PDB files. You can specify as many as you want, but only two will be used for the superimposition. """ self.proteins = [] # reset proteins to an empty array parser = PDBParser(QUIET=True) for filename in pdb_filenames: # use file name as PDB id pdb_id = self.__get_pdb_id_from_filename(filename) # get PDB contents self.proteins.append(parser.get_structure(pdb_id, filename))
def removeDoubleAtoms():# Remove all double atoms defined in a pdb and save the new structure in pdbname_noDouble.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() structure.remove_disordered_atoms() w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noDouble.pdb')
def Draw(self, parent, filename): p = PDBParser(PERMISSIVE=1) # structure_id = Rec[1] structure = p.get_structure("WHYY", filename) self.pdbMat = structure.get_list() rx = [] ry = [] rz = [] bx = [] by = [] bz = [] gx = [] gy = [] gz = [] for chain in self.pdbMat[0].get_list(): for resnum, residue in enumerate(chain.get_list()): atom = residue.get_list() if len(atom) > 3: if resnum > 1: bx[resnum - 2].append(npos[0]) by[resnum - 2].append(npos[1]) bz[resnum - 2].append(npos[2]) npos = atom[0].get_coord() capos = atom[1].get_coord() cpos = atom[2].get_coord() opos = atom[3].get_coord() rx.append([npos[0], capos[0]]) ry.append([npos[1], capos[1]]) rz.append([npos[2], capos[2]]) bx.append([capos[0], cpos[0]]) by.append([capos[1], cpos[1]]) bz.append([capos[2], cpos[2]]) gx.append([cpos[0], opos[0]]) gy.append([cpos[1], opos[1]]) gz.append([cpos[2], opos[2]]) for n, line in enumerate(rx): x = np.array(line) y = np.array(ry[n]) z = np.array(rz[n]) parent.ax2.plot(x, y, z, "r-", linewidth=5) for n, line in enumerate(bx): x = np.array(line) y = np.array(by[n]) z = np.array(bz[n]) parent.ax2.plot(x, y, z, "b-", linewidth=5) for n, line in enumerate(gx): x = np.array(line) y = np.array(gy[n]) z = np.array(gz[n]) parent.ax2.plot(x, y, z, "g-", linewidth=5)
def Pdb2Gro(pdb_file, gro_file, ch_name): from Bio.PDB.PDBParser import PDBParser p = PDBParser(PERMISSIVE=1) pdb_id = pdb_file if pdb_file[-4:].lower()!=".pdb": pdb_file = pdb_file + ".pdb" if pdb_id[-4:].lower()==".pdb": pdb_id = pdb_id[:-4] output = gro_file s = p.get_structure(pdb_id, pdb_file) chains = s[0].get_list() if ch_name=='': ch_name = 'A' for chain in chains: if chain.get_id()==ch_name: ires = 0 iatom = 0 res_name = "" atoms = [] for res in chain: is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id('C') res_id = res.get_id()[0] if (res_id ==' ' or res_id =='H_MSE' or res_id =='H_M3L' or res_id=='H_CAS') and is_regular_res: ires = ires + 1 res_name = res.get_resname() residue_no = res.get_id()[1] for atom in res: iatom = iatom + 1 atom_name = atom.get_name() xyz = atom.get_coord() # residue_no = atom.get_full_id()[3][1] atoms.append( Atom(iatom, atom_name, residue_no, res_name, xyz) ) out = open(output, 'w') out.write(" Structure-Based gro file\n") out.write( (" "+str(len(atoms)))[-12:] ) out.write("\n") for iatom in atoms: iatom.write_(out) out.close()
def get_ca(pdbfile): p=PDBParser(PERMISSIVE=1) ca_atoms = [] s = p.get_structure(pdbfile,pdbfile) chains = s[0].get_list() for chain in chains: for res in chain: is_regular_res = res.has_id('CA') and res.has_id('O') res_id = res.get_id()[0] if (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L' or res_id=='H_CAS' ) and is_regular_res: resname = res.get_resname(); ca_atoms.append(res['CA'].get_coord()) else : print "Pdb file contains irregular residue names or missing CA / O atoms! Fix it and run again! Exit with error." print "res_id :", res_id sys.exit() return ca_atoms
def score (query_pdb_path, against_pdb_path, query_fp_path = None, against_fp_path = None, query_epitope = [], against_epitope = [], spin_image_height_step = 5, spin_image_radius_step = 2, sphere_radius_step = 2, cutoff = 20.0, spin_image_radius_range = (0, 20), spin_image_height_range = (-30, 10), sphere_radius_range = (0, 20), callback = write_score_to_file, cbargs=[]): p = PDBParser(PERMISSIVE=1) query_struct = p.get_structure(os.path.basename (query_pdb_path), query_pdb_path) against_struct = p.get_structure(os.path.basename (against_pdb_path), against_pdb_path) query_complex = Complex (query_struct, query_epitope) against_complex = Complex (against_struct, against_epitope) if query_fp_path is None or against_fp_path is None:#if fp is not given query_complex.get_fp(spin_image_radius_step = spin_image_radius_step, spin_image_height_step = spin_image_height_step, sphere_radius_step = sphere_radius_step) against_complex.get_fp(spin_image_radius_step = spin_image_radius_step, spin_image_height_step = spin_image_height_step, sphere_radius_step = sphere_radius_step) query_fp_string = query_complex.fp2str () against_fp_string = against_complex.fp2str () else: #if fp is given, read them with open (query_fp_path, 'r') as f1, open(against_fp_path, 'r') as f2: query_fp_string = f1.read () against_fp_string = f2.read () query = FPWithComplex (query_complex, query_fp_string) against = FPWithComplex (against_complex, against_fp_string) score1, score2, score3 = similarity_between (query, against, cutoff = cutoff) #z1, z2, z3 = similarity_between (query, query, cutoff = cutoff) #the normalization constant #print score1, score2, score3 if callback is not None: callback ((score1, score2, score3), *cbargs) return score1, score2, score3
def __init__(self, filename): self.spheredata = '' E2C = {} E2R = {} exec elements # Read the color mappings at the bottom of this file # Read the file atoms = [] parser = PDBParser() structure = parser.get_structure('test',filename) for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): atoms += [atom] # Look up colors and radius spheres = [] for atom in atoms: s = Sphere() s.x, s.y, s.z = atom.get_coord() element = atom.get_name().strip(string.digits) s.radius = E2R[element] if E2R.has_key(element) else 1.5 color = E2C[element] if E2C.has_key(element) else 0xFF1493 s.r = (color & 0xff) / 255.0 s.g = ((color & 0xff00) >> 8) / 255.0 s.b = ((color & 0xff0000) >> 16) / 255.0 spheres += [s] self.spheredata += struct.pack('fff f ffff', s.x,s.y,s.z, s.radius, s.r,s.g,s.b,1.0) self.spheres = spheres # Figure out the total radius xs, ys, zs = [s.x for s in spheres], [s.y for s in spheres], [s.z for s in spheres] dx = max(xs) - min(xs) dy = max(ys) - min(ys) dz = max(zs) - min(zs) self.radius = np.sqrt(dx*dx + dy*dy + dz*dz) / 2 + 1.5 self.x = (max(xs) + min(xs)) / 2 self.y = (max(ys) + min(ys)) / 2 self.z = (max(zs) + min(zs)) / 2
def removeHetero():# Remove all heteroatoms from a pdb and save the new structure in pdbname_noHetero.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() for model in structure: for chain in model: for residue in chain: id = residue.id if id[0] != ' ': chain.detach_child(residue.id) if len(chain) == 0: model.detach_child(chain.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noHetero.pdb')
def renameChain(): parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() what_chain=raw_input('What is the chain you want to rename : ') what_chain2=raw_input('What is the new name of this chain : ') for model in structure: for chain in model: if chain.id == what_chain: chain.id = what_chain2 w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_rename.pdb')
def deleteResidue():# Delete a residue from a pdb and save the new structure in pdbname_noResidue.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() rm_residue=raw_input('What residue you want to delete : ') for model in structure: for chain in model: for residue in chain: print residue.id if(residue.id[1]==rm_residue): print 'HELLO' chain.detach_child(residue.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noResidue.pdb')
def Draw(self, parent, filename): p = PDBParser(PERMISSIVE=1) structure = p.get_structure('WHYY', filename) self.pdbMat = structure.get_list() rx = [] ry = [] rz = [] bx = [] by = [] bz = [] gx = [] gy = [] gz = [] for chain in self.pdbMat[0].get_list(): for residue in chain.get_list(): for atom in residue.get_list(): if atom.get_id()[0][0] not in ["H","W"]: pos = atom.get_coord() if atom.get_name() == 'CA': bx.append(pos[0]) by.append(pos[1]) bz.append(pos[2]) elif atom.get_name() == 'N': rx.append(pos[0]) ry.append(pos[1]) rz.append(pos[2]) elif atom.get_name() == 'O': gx.append(pos[0]) gy.append(pos[1]) gz.append(pos[2]) x = np.array(bx) y = np.array(by) z = np.array(bz) parent.ax2.scatter(x, y, z, zdir='z', marker='o', s=385, c='b') #385 is the radius of carbon times 5 x = np.array(rx) y = np.array(ry) z = np.array(rz) parent.ax2.scatter(x, y, z, zdir='z', marker='o', s=350, c='r') #350 is the radius of Nitrogen times 5 x = np.array(gx) y = np.array(gy) z = np.array(gz) parent.ax2.scatter(x, y, z, zdir='z', marker='o', s=330, c='g') #330 is the radius of oxygen times 5
def Draw(self, parent, filename): p = PDBParser(PERMISSIVE=1) #structure_id = Rec[1] structure = p.get_structure('WHYY', filename) self.pdbMat = structure.get_list() x = [] y = [] z = [] for chain in self.pdbMat[0].get_list(): for residue in chain.get_list(): for atom in residue.get_list(): if atom.get_name() == 'CA': pos = atom.get_coord() x.append(pos[0]) y.append(pos[1]) z.append(pos[2]) x = np.array(x) y = np.array(y) z = np.array(z) parent.ax2.plot(x,y,z)
def getSequence(): # Get the sequence of a specific chain parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() seq='' what_chain=raw_input('For what chain do you want the sequence : ') for model in structure: for chain in model: if chain.id != what_chain: model.detach_child(chain.id) ppb=PPBuilder() for pp in ppb.build_peptides(structure): seq = seq + pp.get_sequence() seq=seq.upper() print seq
def Init(self, parent, bigPanel, colorList): self.bigPanel = bigPanel self.bPSize = self.bigPanel.GetSize() self.cL = colorList self.tabButtons = [] self.FrameInit() self.CoverInit() self.TabButtonInit() self.timesCalled = 0 self.ssMeth = 0 self.p = PDBParser(PERMISSIVE=1)
def GetExec(): Recs = os.listdir(os.getcwd()) newList = [] j = 0 listdata=dict() k = 0 p = PDBParser(PERMISSIVE=1) ftime = open('lastChecked.txt','r') pT = float(ftime.readline()) ftime.close() f = open('lastChecked.txt','w') f.write(str(time.time())) f.close() while k < len(Recs): try: (name, ext) = os.path.splitext(Recs[k]) if ext=='': 2+2 elif ext==".pdb": f = name + ".pickle" newList.append([Recs[k],os.getcwd()]) if not os.path.isfile(f) or float(fmt.filemtime(Recs[k])) > pT: with warnings.catch_warnings(): warnings.simplefilter("ignore") pdbRec = p.get_structure(name, Recs[k]) models = pdbRec.get_list() listdata[j] = str(name), len(models), os.getcwd()+'/'+str(name) + str(ext) rHoward = [str(name), len(models), str(name) + str(ext)] mP.spickle(f, rHoward) else: rHoward = mP.opickle(f) listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2] j += 1 except IOError, e: print e k += 1
def assembleChain(): # Allow to assemble 2 chains together parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() what_chain=raw_input('What is the 1st chain you want to assemble : ') what_chain2=raw_input('What is the 2nd chain you want to assemble : ') for model in structure: for chain in model: if chain.id == what_chain: parent=chain; elif chain.id == what_chain2: for residue in chain: residue.get_parent().id=what_chain w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_assemble.pdb')
def get_interaction_pairs_from_input(options): """ Takes the path of a directory and returns a list holding the interaction dictionary of the pdbs in this directory, a similar chains dictionary and a dictionary that relates every chain with its id. :param directory: directory from where the pdb files we want to process are. :return: list holding the interaction dictionary of the pdbs in this directory, a similar chains dictionary and a dictionary that relates every chain with its id """ directory = options.infile files_list = get_pdb_from_directory(directory) structure_list = [] parser = PDBParser(PERMISSIVE=1) # Save the pdb files in separate structures for file in files_list: structure_id = get_structure_name(file) structure = parser.get_structure(structure_id, file) if len(list(structure.get_chains())) == 2: structure_list.append(structure) else: structure_list += get_all_interaction_pairs(options, file, False)[0] id_dict = get_id_dict(structure_list) chain_list = [] # Add all the chains to the seq_dict for structure in structure_list: chain_list += list(structure.get_chains()) seq_dict = get_seq_dict(chain_list) similar_sequences = get_similar_sequences(chain_list, seq_dict) interaction_dict = {} # Add all the interactions to a dictionary for structure in structure_list: chains = list(structure.get_chains()) nr_interaction = tuple(sorted([id_dict[similar_sequences[chains[0]]], id_dict[similar_sequences[chains[1]]]])) if nr_interaction not in interaction_dict.keys(): interaction_dict[nr_interaction] = [] interaction_dict[nr_interaction].append(chains) clean_interaction_dict(interaction_dict, similar_sequences, options) if options.verbose: print('\n') counter = 0 for pair in interaction_dict: print(pair) for int in interaction_dict[pair]: print("\t%s" % int) counter += 1 print(counter) # TODO: Eliminar cadenas no utilizadas de similar sequences return [interaction_dict, id_dict, similar_sequences, seq_dict]
def main(): parser = argparse.ArgumentParser(prog='polarContacts', description='Polar contacts detector') parser.add_argument('--backonly', action='store_true', dest='backonly', help='Restrict to backbone') parser.add_argument('--nowats', action='store_true', dest='nowats', help='Exclude water molecules') parser.add_argument('--diel', type=float, action='store', dest='diel', default=1.0, help='Relative dielectric constant') parser.add_argument('--vdw', action='store', dest='vdwprm', help='VDW Paramters file') parser.add_argument('--rlib', action='store', dest='reslib', help='AminoAcid library') parser.add_argument('pdb_path') args = parser.parse_args() print("Settings") print("--------") for k, v in vars(args).items(): print('{:10}:'.format(k), v) backonly = args.backonly nowats = args.nowats pdb_path = args.pdb_path vdwprm = args.vdwprm reslib = args.reslib diel = args.diel # Load VDW parameters vdwParams = VdwParamset(vdwprm) print("{} atom types loaded".format(vdwParams.ntypes)) # Load AA Library aaLib = ResiduesDataLib(reslib) print("{} amino acid atoms loaded".format(aaLib.nres)) if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print("#ERROR: loading PDB") sys.exit(2) # Checking for models if len(st) > 1: print("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1 - at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue # atom1 = Atom(at1,1,aaLib,vdwParams) # atom2 = Atom(at2,1,aaLib,vdwParams) if at1.get_serial_number() < at2.get_serial_number(): hblist.append([at1, at2]) else: hblist.append([at2, at1]) print() print("Polar contacts") print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)')) for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): r1 = hb[0].get_parent() r2 = hb[1].get_parent() print('{:14} {:14} {:6.3f} '.format( r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id, r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1])) print() print("Residue interactions") # Making list or residue pairs to avoid repeated pairs respairs = [] for hb in hblist: r1 = hb[0].get_parent() r2 = hb[1].get_parent() if [r1, r2] not in respairs: respairs.append([r1, r2]) l = [] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. evdw = 0. for at1 in rpair[0].get_atoms(): resid1 = rpair[0].get_resname() atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) vdwprm1 = vdwParams.atTypes[atparam1.atType] for at2 in rpair[1].get_atoms(): resid2 = rpair[1].get_resname() atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) vdwprm2 = vdwParams.atTypes[atparam2.atType] eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / ( at1 - at2) eps = math.sqrt(vdwprm1.eps * vdwprm2.eps) sig = math.sqrt(vdwprm1.sig * vdwprm2.sig) evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 - (sig / (at1 - at2))**6) print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw) l.append([ resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw ]) #here we have the code for finding the most stable contacts and plotting each energy component #(global, electrostatic and vdw) with respect to the residue number involved in these contacts print("Five most stable contacts") stable = [] for index, element in enumerate(sorted(l, key=lambda i: i[6])): if index < 5: stable.append(element) print(element) n_groups = 5 eint = (-96.879048334060371, -89.262401988309293, -63.650369322307412, -51.488465980661772, -50.345308360049728) vdw = (-1.0577685827505385, -1.5931226867662258, 1.5038605656892994, -2.9601475910966375, -2.0108017155800604) etot = (-97.936816916810912, -90.855524675075515, -62.146508756618111, -54.448613571758408, -52.356110075629786) fig, ax = plt.subplots() index = np.arange(n_groups) bar_width = 0.15 opacity = 0.5 inf1 = plt.bar(index, eint, bar_width, alpha=opacity, color='b', label='electrostatic energies') inf2 = plt.bar(index + bar_width, vdw, bar_width, alpha=opacity, color='g', label='van der waals energies') inf3 = plt.bar(index + bar_width, etot, bar_width, alpha=opacity, color='r', label='total energies') plt.title('Energies for pair of residues') plt.xlabel('Contacts') plt.ylabel('Energy') plt.xticks(index + bar_width, ('LYS-ASP', 'LYS-GLU', 'GLU-LYS', 'GLU-ARG', 'ASP-ARG')) plt.legend() plt.tight_layout() plt.show() both_main_x = [] both_main_y = [] both_side_x = [] both_side_y = [] main_side_x = [] main_side_y = [] side_main_x = [] side_main_y = [] for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): if hb[0].id in backbone_polars: where0 = 'main' else: where0 = 'side' if hb[1].id in backbone_polars: where1 = 'main' else: where1 = 'side' label = where0 + ':' + where1 if label[0] == label[5] and label[0] == 'm': value = 1 both_main_x.append(hb[0].get_parent().id[1]) both_main_y.append(hb[1].get_parent().id[1]) elif label[0] == label[5] and label[0] == 's': value = 2 both_side_x.append(hb[0].get_parent().id[1]) both_side_y.append(hb[1].get_parent().id[1]) elif label[0] != label[5] and label[0] == 'm': value = 3 main_side_x.append(hb[0].get_parent().id[1]) main_side_y.append(hb[1].get_parent().id[1]) elif label[0] != label[5] and label[0] == 's': value = 4 side_main_x.append(hb[0].get_parent().id[1]) side_main_y.append(hb[1].get_parent().id[1]) linking = [label, value, hb[0].id, hb[1].id, hb[0] - hb[1]] print('{:14}{:14}{:14}{:14}{:6.3f}'.format(label, value, hb[0].id, hb[1].id, hb[0] - hb[1])) plt.figure(figsize=(10, 8)) plt.scatter(both_main_x, both_main_y, c='red', label='both_main') plt.scatter(both_side_x, both_side_y, c='green', label='both_side') plt.scatter(main_side_x, main_side_y, c='blue', label='main_side') plt.scatter(side_main_x, side_main_y, c='yellow', label='side_main') plt.title('Interaction') plt.xlabel('Residue1 number') plt.ylabel('Residue2 number') plt.legend(loc='upper right') plt.show() #The surface residues are the ones with an Area<5 (http://cib.cf.ocha.ac.jp/bitool/ASA/display.php?id=1513152459.2996) surface_res = [['ILE', 3], ['VAL', 5], ['ILE', 23], ['VAL', 26], ['ILE', 30], ['GLN', 41], ['LEU', 43], ['LEU', 56], ['ILE', 61], ['LEU', 67], ['LEU', 69]] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. for atom1 in rpair[0].get_atoms(): resname1 = rpair[0].get_resname() atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) for atom2 in rpair[1].get_atoms(): resname2 = rpair[1].get_resname() atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) for values in surface_res: for values2 in surface_res: if resname1 == values[0] and rpair[0].id[1] == values[ 1]: if resname2 == values2[0] and rpair[1].id[ 1] == values2[1]: eint = eint + 80 * atparam1.charg * atparam2.charg / diel / ( atom1 - atom2) if eint != 0: print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw)
def __contains__(self, res): """True if the given residue is in any of the mapped fragments. @type res: L{Residue} """ return (res in self.fd) def __getitem__(self, res): """ @type res: L{Residue} @return: fragment classification @rtype: L{Fragment} """ return self.fd[res] if __name__=="__main__": import sys p = PDBParser() s = p.get_structure("X", sys.argv[1]) m = s[0] fm = FragmentMapper(m, 10, 5, "levitt_data") for r in Selection.unfold_entities(m, "R"): print("%s:" % r) if r in fm: print(fm[r])
logger.error(str(err)) raise SystemExit else: logger.warning( "PDB structure already exists ({0}), no need to download it again" .format(input_pdb_file)) else: pdb_code = filename input_pdb_file = args.pdb_file_name if not os.path.exists(input_pdb_file): logger.error("PDB structure file {0} not found".format(input_pdb_file)) raise SystemExit # Check if chain belongs to this PDB pdb_parser = PDBParser(PERMISSIVE=True, QUIET=True) structure = pdb_parser.get_structure(filename, input_pdb_file) chain_ids = [chain.id for chain in structure.get_chains()] chain_id = args.chain_id.upper() if len(chain_id) > 1: logger.error("Wrong chain id {0}".format(chain_id)) raise SystemExit if chain_id not in chain_ids: logger.error("Chain {0} provided not in available chains: {1}".format( chain_id, str(chain_ids))) raise SystemExit # Save only the given chain and discard residues with alternative positions io = PDBIO() current_pdb_file = "{0}{1}_{2}.pdb".format(output_dir, pdb_code, chain_id) for chain in structure.get_chains():
return True return False def _test_dist(self, c, n): """Return 1 if distance between atoms<radius (PRIVATE).""" if (c - n) < self.radius: return 1 else: return 0 if __name__ == "__main__": import sys from Bio.PDB.PDBParser import PDBParser p = PDBParser(PERMISSIVE=True) s = p.get_structure("scr", sys.argv[1]) ppb = PPBuilder() print("C-N") for pp in ppb.build_peptides(s): print(pp.get_sequence()) for pp in ppb.build_peptides(s[0]): print(pp.get_sequence()) for pp in ppb.build_peptides(s[0]["A"]): print(pp.get_sequence()) for pp in ppb.build_peptides(s): for phi, psi in pp.get_phi_psi_list():
# [email protected] from Bio.PDB import PDBIO from Bio.PDB.PDBParser import PDBParser from optparse import OptionParser parser = OptionParser() parser.add_option( "-f", "--pdbfile", default=None, type="string", help="pdb structure file for additional 3-coord cartesian per residue") (options, args) = parser.parse_args() parser = PDBParser() structure = parser.get_structure("mystruct", options.pdbfile) model = structure[0] average_bfactors = {} for residue in model["A"]: average_bfactors[residue.get_id()[1]] = 0.0 for chain in model.get_list(): for residue in chain.get_list(): if residue.has_id("CA"): ca = residue["CA"] average_bfactors[residue.get_id()[1]] += float( ca.get_bfactor()) / float(len(model.get_list()))
from Bio.PDB.PDBIO import PDBIO from Bio.PDB.PDBIO import Select from Bio.PDB.PDBParser import PDBParser from Bio.PDB.mmtf import MMTFParser from Bio.PDB.PDBExceptions import PDBConstructionWarning import os.path as op import logging import warnings import ssbio.utils from ssbio.biopython.bp_mmcifparser import MMCIFParserFix log = logging.getLogger(__name__) cifp = MMCIFParserFix(QUIET=True) pdbp = PDBParser(PERMISSIVE=True, QUIET=True) mmtfp = MMTFParser() def as_protein(structure, filter_residues=True): """ Exposes methods in the Bio.Struct.Protein module. Parameters: - filter_residues boolean; removes non-aa residues through Bio.PDB.Polypeptide is_aa function [Default: True] Returns a new structure object. """ from ssbio.biopython.Bio.Struct.Protein import Protein return Protein.from_structure(structure, filter_residues) class StructureIO(PDBIO):
# To add a new cell, type '# %%' # To add a new markdown cell, type '# %% [markdown]' # %% from Bio.PDB.PDBParser import PDBParser parser = PDBParser() structure = parser.get_structure("test", "1osv_ligand_backup.pdb") model = structure[0] chain = model["A"] # %% for chain in model: print(chain) # %% from Bio.PDB import PDBParser, PDBIO io = PDBIO() pdb = PDBParser().get_structure("test", "1osv_ligand_backup.pdb") # %% import os io = PDBIO() end = "_ligand.pdb" directory = "./ligands/" for filename in os.listdir(directory): if filename.endswith(end): # IF the file ends with the generic ending, fileid = filename # duplicate the fileid for a shortened title fileid = fileid.replace(end, '') # remove the end
b2 = len(sequence_b_list2) b3 = len(sequence_b_list3) b4 = len(sequence_b_list4) num1 = b1 + b2 + b3 + b4 ratio1b = float('%.6f' % (b1 / num1)) ratio2b = float('%.6f' % (b2 / num1)) ratio3b = float('%.6f' % (b3 / num1)) ratio4b = float('%.6f' % (b4 / num1)) string_b = str(ratio1b) + ',' + str(ratio2b) + ',' + str( ratio3b) + ',' + str(ratio4b) + ',' + '0' return string_b for file in PDBList: try: parser = PDBParser(PERMISSIVE=1) structure_id = os.path.splitext(file)[0] filename = file structure1 = parser.get_structure(structure_id, filename) model = structure1[0] chain_A = model["H"] chain_B = model["L"] surface_list_a, surface_list_b = surface_list(file) string_aa = feature_extraction_sequence2(surface_list_a) string_bb = feature_extraction_sequence_b2(surface_list_b) with open("result_surface_sequence2.txt", "a") as f: f.write(string_aa + "\n") f.write(string_bb + "\n")
def read_pdb(parametersobject): actualstdout = sys.stdout sys.stdout = open(os.devnull, 'w') pdbname = parametersobject.parameterdic['Initial_dimer_pdb'] Path_to_awsem = parametersobject.parameterdic['Path_to_awsem'] Python2_command = parametersobject.parameterdic['Python2_command'] name = pdbname[:-4] structure = PDBParser(PERMISSIVE=1).get_structure('init', pdbname) if len(structure) > 1: print("More than one model found in PDB. Using model 0 only.") if len(structure[0]) > 2: print("More than two chains found in PDB. Exiting.") sys.exit(1) elif len(structure[0]) < 2: print("Less than two chains found in PDB. Exiting.") sys.exit(1) chainnames = [] chain = structure[0].get_list() for c in chain: chainnames.append(c.id) if len(chain[0]) < len(chain[1]): bigger = 1 bigid = chain[1].id smaller = 0 smallid = chain[0].id first_chain_is_bigger = False else: bigger = 0 bigid = chain[0].id smaller = 1 smallid = chain[1].id first_chain_is_bigger = True #now chain[0] is the bigger chain #next steps are #recentre #get .data file. #convert to lammpstrj #convert back to pdb #yeah, it's a bit ridiculous but I don't know how the weirdly mangled pdb is created by awsem and I need it to be exactly the same as the simulation. #remove useless files #write information e centre = COM(chain[bigger]) for atom in structure[0].get_atoms(): atom.set_coord(atom.coord - centre._ar) w = PDBIO() w.set_structure(structure) w.save(name + '_recentred.pdb') cwd = os.getcwd() directorynames = [ 'md_input', 'md_output', 'analysis', 'results_main', 'results_individual', 'pdb_trajectories' ] for d in directorynames: directory = os.path.normpath(cwd + '/' + d) try: os.makedirs(directory) except OSError as e: pass os.system(Python2_command + " " + Path_to_awsem + "/create_project_tools/PDBToCoordinates.py " + name + "_recentred " + name + "_recentred" + ".coord") os.system(Python2_command + " " + Path_to_awsem + "/create_project_tools/CoordinatesToWorkLammpsDataFile.py " + name + "_recentred" + ".coord " + name + "_recentred" + ".data -b") os.system(Python2_command + " " + Path_to_awsem + "/frag_mem_tools/Pdb2Gro.py " + name + "_recentred " + " md_input/chain1.gro " + chain[0].id) os.system(Python2_command + " " + Path_to_awsem + "/frag_mem_tools/Pdb2Gro.py " + name + "_recentred " + " md_input/chain2.gro " + chain[1].id) f_data = open(name + "_recentred" + ".data", "r") f_lammps = open(name + "_recentred" + ".lammpstrj", "w+") f_lammps.write("ITEM: TIMESTEP\n0\nITEM: BOX BOUNDS ff ff ff\n") for _ in range(3): f_lammps.write("-2.0000000000000000e+02 2.0000000000000000e+02\n") f_lammps.write("ITEM: ATOMS id type xs ys zs\n") firstchain = True for linecount, line in enumerate(f_data): if linecount < 28: continue linesplit = line.strip().split() if len(linesplit) < 1: break x = (float(linesplit[5]) + 200) / 400 y = (float(linesplit[6]) + 200) / 400 z = (float(linesplit[7]) + 200) / 400 f_lammps.write(linesplit[0] + ' ' + linesplit[3]) f_lammps.write(' %.9f %.9f %.9f\n' % (x, y, z)) if firstchain: if linesplit[1] == '2': firstchain = False first_chain_max_id = int(linesplit[0]) - 1 f_lammps.close() f_data.close() location = os.path.normpath( Path_to_awsem + "results_analysis_tools/BuildAllAtomsFromLammps_seq_multichain.py " + name + "_recentred" + ".lammpstrj") os.system(Python2_command + " " + location + " refpdb " + name + "_recentred" + ".seq") sys.stdout = actualstdout os.remove("refpdb.psf") os.remove(name + "_recentred" + ".lammpstrj") os.remove(name + "_recentred" + ".data") os.remove(name + "_recentred" + ".coord") d = parametersobject.deriveddic d['first_chain'] = chainnames[0] d['second_chain'] = chainnames[1] d['first_chain_length'] = len(chain[0]) d['second_chain_length'] = len(chain[1]) d['bigger_chain'] = bigid d['smaller_chain'] = smallid d['first_chain_max_id'] = first_chain_max_id d['first_chain_is_bigger'] = first_chain_is_bigger parametersobject.save_derived()
pylab.xlim(0, c_info_base[-1]) pylab.ylim(0, c_info_uniq[-1]) rcParams['figure.figsize'] = 5, 10 pylab.savefig(out_id + '.png', dpi=300) pylab.close() def get_file(filein): string = '' f_pairs = open(filein, "r") for line in f_pairs: string = string + line return string parser = PDBParser(PERMISSIVE=1, QUIET=1) if __name__ == "__main__": parser2 = argparse.ArgumentParser() parser2.add_argument('-i', '--input', dest="infile", action="store", nargs="?", default='.', help="Input FASTA file", required=True) parser2.add_argument( '-o', '--output', dest="outfile",
if len(sys.argv)<=3: print "\nExtractGoModelCGCoeffs.py Input_file PDB_id snapshot\n" print "-s\tSplit into files for each chain" exit() filename = sys.argv[1] pdb_id = sys.argv[2] if pdb_id[-4:].lower()==".pdb": pdb_file = pdb_id else: pdb_file = pdb_id + ".pdb" frame = int(sys.argv[3]) p = PDBParser(PERMISSIVE=1) s = p.get_structure(pdb_id, pdb_file) chains = s[0].get_list() chain = chains[0] for res in chain: is_regular_res = res.has_id('CA') and res.has_id('O') res_id = res.get_id()[0] if (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L') and is_regular_res: ca_atoms_pdb.append(res['CA'].get_coord()) for i in range( 0, len(ca_atoms_pdb) ): sigmaN.append([]) for j in range( i+4, len(ca_atoms_pdb) ): if abs(j-i)<3:
fp.write("TER\n") if model_flag and model_residues_written: fp.write("ENDMDL\n") if write_end: fp.write('END\n') if close_file: fp.close() if __name__ == "__main__": from Bio.PDB.PDBParser import PDBParser import sys p = PDBParser(PERMISSIVE=1) s = p.get_structure("test", sys.argv[1]) io = PDBIO() io.set_structure(s) io.save("out1.pdb") fp = open("out2.pdb", "w") s1 = p.get_structure("test1", sys.argv[1]) s2 = p.get_structure("test2", sys.argv[2]) io = PDBIO(1) io.set_structure(s1) io.save(fp) io.set_structure(s2) io.save(fp, write_end=1)
class Rebuild(unittest.TestCase): """Read PDB and mmCIF structures, convert to/from internal coordinates.""" PDB_parser = PDBParser(PERMISSIVE=True, QUIET=True) CIF_parser = MMCIFParser(QUIET=True) pdb_1LCD = PDB_parser.get_structure("1LCD", "PDB/1LCD.pdb") pdb_2XHE = PDB_parser.get_structure("2XHE", "PDB/2XHE.pdb") cif_3JQH = CIF_parser.get_structure("3JQH", "PDB/3JQH.cif") cif_4CUP = CIF_parser.get_structure("4CUP", "PDB/4CUP.cif") def test_rebuild_multichain_missing(self): """Convert multichain missing atom protein to internal coordinates and back.""" # 2XHE has regions of missing chain, last residue has only N r = structure_rebuild_test(self.pdb_2XHE, False) self.assertEqual(r["residues"], 787) self.assertEqual(r["rCount"], 835) self.assertEqual(r["rMatchCount"], 835) self.assertEqual(r["aCount"], 6267) self.assertEqual(r["disAtmCount"], 0) self.assertEqual(r["aCoordMatchCount"], 6267) self.assertEqual(len(r["chains"]), 2) self.assertTrue(r["pass"]) def test_rebuild_disordered_atoms_residues(self): """Convert disordered protein to internal coordinates and back.""" # 3jqh has both disordered residues # and disordered atoms in ordered residues with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", PDBConstructionWarning) r = structure_rebuild_test(self.cif_3JQH, False) # print(r) self.assertEqual(r["residues"], 26) self.assertEqual(r["rCount"], 47) self.assertEqual(r["rMatchCount"], 47) self.assertEqual(r["aCount"], 217) self.assertEqual(r["disAtmCount"], 50) self.assertEqual(r["aCoordMatchCount"], 217) self.assertEqual(len(r["chains"]), 1) self.assertTrue(r["pass"]) def test_model_change_internal_coords(self): """Get model internal coords, modify psi and chi1 values and check.""" for mdl in self.pdb_1LCD: if mdl.serial_num == 2: break mdl.atom_to_internal_coordinates() # other tests show can build with arbitrary internal coords # build here so changes below trigger more comlicated # xAtoms_needs_update mask arrays mdl.internal_to_atom_coordinates() nvt = {} nvc1 = {} nvpsi = {} tcount = 0 c1count = 0 psicount = 0 for r in mdl.get_residues(): ric = r.internal_coord if ric: # hedra change tau = ric.get_angle("tau") if ric.rprev != [] and tau is not None: tcount += 1 nv = tau + 0.5 ric.set_angle("tau", nv) nvt[str(r)] = nv # sidechain dihedron change chi1 = ric.get_angle("chi1") if chi1 is not None: c1count += 1 nv = chi1 + 90 if nv > 180.0: nv -= 360.0 ric.set_angle("chi1", nv) nvc1[str(r)] = nv # backbone dihedron change psi = ric.get_angle("psi") if psi is not None: psicount += 1 nv = psi - 90 if nv < -180.0: nv += 360.0 ric.set_angle("psi", nv) nvpsi[str(r)] = nv mdl.internal_to_atom_coordinates() sf = StringIO() write_PDB(self.pdb_1LCD, sf) sf.seek(0) new_1LCD = self.PDB_parser.get_structure("1LCD", sf) for mdl in new_1LCD: if mdl.serial_num == 2: break mdl.atom_to_internal_coordinates() ttcount = 0 c1tcount = 0 psitcount = 0 for r in mdl.get_residues(): ric = r.internal_coord if ric: tau = ric.get_angle("tau") if ric.rprev != [] and tau is not None: ttcount += 1 self.assertAlmostEqual(tau, nvt[str(r)], places=1) chi1 = ric.get_angle("chi1") if chi1 is not None: c1tcount += 1 self.assertAlmostEqual(chi1, nvc1[str(r)], places=1) psi = ric.get_angle("psi") if psi is not None: psitcount += 1 self.assertAlmostEqual(psi, nvpsi[str(r)], places=1) self.assertEqual(tcount, ttcount) self.assertEqual(c1count, c1tcount) self.assertEqual(psicount, psitcount) self.assertTrue(ttcount > 0) self.assertTrue(c1count > 0) self.assertTrue(psicount > 0) def test_write_SCAD(self): """Check SCAD output plus MaxPeptideBond and Gly CB. SCAD tests: scaling, transform mtx, extra bond created (allBonds) """ sf = StringIO() write_SCAD( self.cif_4CUP, sf, 10.0, pdbid="4cup", backboneOnly=True, includeCode=False ) sf.seek(0) next_one = False with as_handle(sf, mode="r") as handle: for aline in handle.readlines(): if "// (1856_S_CB, 1856_S_CA, 1856_S_C)" in aline: m = re.search(r"\[\s+(\d+\.\d+)\,", aline) if m: # test correctly scaled atom bond length self.assertAlmostEqual(float(m.group(1)), 15.30582, places=3) else: self.fail("scaled atom bond length not found") elif '[ 1, "1857M",' in aline: next_one = True elif next_one: next_one = False # test last residue transform looks roughly correct # some differences due to sorting issues on different python # versions target = [-12.413, -3.303, 35.771, 1.0] ms = re.findall( # last column of each row r"\s+(-?\d+\.\d+)\s+\]", aline ) if ms: for i in range(0, 3): self.assertAlmostEqual(float(ms[i]), target[i], places=0) else: self.fail("transform not found") sf.seek(0) IC_Residue.gly_Cbeta = True write_SCAD( self.pdb_2XHE[0]["A"], sf, 10.0, pdbid="2xhe", maxPeptideBond=100.0, includeCode=False, ) sf.seek(0) allBondsPass = False maxPeptideBondPass = False glyCbetaFound = False with as_handle(sf, mode="r") as handle: for aline in handle.readlines(): # test extra bond created in TRP (allBonds is True) if '"Cres", 0, 0, 1, 0, StdBond, "W", 24, "CD2CE3CZ3"' in aline: allBondsPass = True # test 509_K-561_E long bond created if "509_K" in aline and "561_E" in aline: maxPeptideBondPass = True if "(21_G_CB, 21_G_CA, 21_G_C)" in aline: glyCbetaFound = True target = [15.33630, 110.17513, 15.13861] ms = re.findall(r"\s+(-?\d+\.\d+)", aline) if ms: for i in range(0, 3): self.assertAlmostEqual(float(ms[i]), target[i], places=0) else: self.fail("Cbeta internal coords not found") self.assertTrue(allBondsPass) self.assertTrue(glyCbetaFound) self.assertTrue(maxPeptideBondPass)
args = parse_cmd.parse_args() print("PDB.filename:", args.pdb_file.name) print("Residue Lib.:", args.reslib_file) print("PDB.filename:", args.vdwprm_file) print("Distance:", args.cutoff_dist) # Loading Libraries # loading residue library from data/aaLib.lib residue_library = ResiduesDataLib(args.reslib_file) # loading VdW parameters ff_params = VdwParamset(args.vdwprm_file) parser = PDBParser(PERMISSIVE=1) print('Parsing', args.pdb_file) # load structure from PDB file of PDB ifle handler st = parser.get_structure('STR', args.pdb_file.name) # assign data types, and charges from libraries # We will use the xtra attribute in Bio.PDB.Atom to hold the new data # Possible errors on N-term and C-Term atoms # Possible errors on HIS alternative forms en.add_atom_parameters(st, residue_library, ff_params) # Calculating surfaces # The specific PATH to naccess script (in soft) is needed # ASA goes to .xtra field directly
import optparse import string import numpy as np from Bio.PDB.PDBParser import PDBParser from Bio.PDB.PDBIO import PDBIO option_parser = optparse.OptionParser() option_parser.add_option('--id', type='str', help='id to attach to pdb structure') options, args = option_parser.parse_args() #TODO PDBParser has additional keyword arguments like PERMISSIVE. # We may decide to add options to the option parser for such # keywords, but for now let's not over-engineer. pdb_parser = PDBParser() def _calculate_center_of_mass(structure): total_mass = 0 mx_total = 0 my_total = 0 mz_total = 0 for atom in structure.get_atoms(): coords = atom.coord.tolist() mass = atom.mass total_mass += mass mx_total += coords[0] * mass my_total += coords[1] * mass mz_total += coords[2] * mass return [
def get_interaction_pairs(options): """ This function Takes a pdb file path and generates a folder with pdb files holding the unique pairwise interactions in the first pdb :param pdb_filename: :return: ... """ pdb_filename = options.infile #Loading the pdb files in structure objects parser = PDBParser(PERMISSIVE=1) structure_id = get_structure_name(pdb_filename) filename = pdb_filename structure = parser.get_structure(structure_id, filename) neighbor_chains = get_neighbor_chains(structure, options) seq_dict = get_seq_dict(structure.get_chains()) similar_sequences = get_similar_sequences(list(structure.get_chains()), seq_dict) interaction_dict = {} # Here we organize the data in similar_sequences and neighbor_chains in a dictionary with pairs of chain types ( # an id representing all chains with more than 95% of similarity) with all the pairwise interactions within this # two chain types for chain1 in neighbor_chains: for chain2 in neighbor_chains[chain1]: nr_interaction = tuple(sorted([similar_sequences[chain1].get_id(), similar_sequences[chain2].get_id()])) if tuple(sorted( [similar_sequences[chain1].get_id(), similar_sequences[chain2].get_id()])) not in interaction_dict: interaction_dict[nr_interaction] = [] interaction_dict[nr_interaction].append([chain1, chain2]) clean_interaction_dict(interaction_dict, similar_sequences) if options.verbose: counter = 0 print('\n') for pair in interaction_dict: print(pair) for int in interaction_dict[pair]: print("\t%s" % int) counter += 1 print(counter) if not os.path.exists(structure_id): os.makedirs(structure_id) else: for the_file in os.listdir(structure_id): file_path = os.path.join(structure_id, the_file) if os.path.isfile(file_path): os.unlink(file_path) io = PDBIO() io.set_structure(structure) for pair in interaction_dict: for interaction in interaction_dict[pair]: io.save('%s/%s_%s%s.pdb' % (structure_id, structure_id, interaction[0].get_id(), interaction[1].get_id()), ChainSelect(interaction[0], interaction[1])) return structure_id
def _add_flanking_seq_fragments(ddg_data_dict: Dict, dataset: str, pdb_filename: str): if "left_flank" not in ddg_data_dict[dataset].columns: ddg_data_dict[dataset]["left_flank"] = np.nan if "wt_restype" not in ddg_data_dict[dataset].columns: ddg_data_dict[dataset]["wt_restype"] = np.nan if "mt_restype" not in ddg_data_dict[dataset].columns: ddg_data_dict[dataset]["mt_restype"] = np.nan if "right_flank" not in ddg_data_dict[dataset].columns: ddg_data_dict[dataset]["right_flank"] = np.nan pdbid = pdb_filename.split(r"/")[-1][0:4].upper() # # Load SEQRES # chain_id_to_seq_res = {} # for record in SeqIO.parse(pdb_filename, "pdb-seqres"): # seq_res = str(record.seq) # chain_id = record.id[-1] # chain_id_to_seq_res[chain_id] = seq_res # print(record.annotations) # # Load PDBSEQ # from Bio.SeqIO.PdbIO import PdbAtomIterator # chain_id_to_pdb_seq = {} # with open(pdb_filename) as handle: # for record in PdbAtomIterator(handle): # pdb_seq = str(record.seq) # chain_id = record.id[-1] # chain_id_to_pdb_seq[chain_id] = pdb_seq from Bio.PDB.PDBParser import PDBParser p = PDBParser() model_first = p.get_structure(pdbid, pdb_filename)[0] chain_id_to_pdb_seq = {} chain_id_to_pdb_residue_numbers = {} for chain in model_first: pdb_seq = [] pdb_residue_numbers = [] for residue in chain.get_residues(): if residue.resname.strip() in [ index_to_three(i) for i in range(20) ]: pdb_residue_numbers.append(residue.id[1]) pdb_seq.append(three_to_one(residue.resname.strip())) chain_id_to_pdb_seq[chain.id] = "".join(pdb_seq) chain_id_to_pdb_residue_numbers[chain.id] = pdb_residue_numbers for idx, row in ddg_data_dict[dataset].iterrows(): if row["pdbid"] == pdbid: residue_number = int(row["variant"][1:-1]) chain_id = row["chainid"] pdb_sequence = chain_id_to_pdb_seq[chain_id] resid = chain_id_to_pdb_residue_numbers[chain_id].index( residue_number) if row["variant"][0] == pdb_sequence[resid]: ddg_data_dict[dataset].loc[idx, "left_flank"] = _trim_left_flank( pdb_sequence[:resid]) ddg_data_dict[dataset].loc[idx, "right_flank"] = _trim_right_flank( pdb_sequence[resid + 1:]) ddg_data_dict[dataset].loc[idx, "wt_restype"] = row["variant"][0] ddg_data_dict[dataset].loc[idx, "mt_restype"] = row["variant"][-1] else: print("WRONG", row[["pdbid", "variant"]])
def GenerateMutations(DataFrame, PDB, PATH): """ Purpose: This function returns the mutated pdb protein files from skempi_v2 database (https://life.bsc.es/pid/skempi2/). Both single mutations and multiple comma separated mutations are taken in to account. If there are multiple mutation indices for the same protein, then this will generate multiple pdb files. Parameters ---------- DataFrame: pandas table The pandas table to read_csv PDB: str The string of the pdb file """ try: from Bio.PDB.PDBIO import PDBIO from Bio.PDB.PDBParser import PDBParser from Bio.Data.IUPACData import protein_letters from Bio.SeqUtils.ProtParam import ProteinAnalysis from Bio.PDB.Polypeptide import PPBuilder from Bio.PDB.Polypeptide import standard_aa_names # Standard amino acid names - https://biopython.org/DIST/docs/api/Bio.PDB.Polypeptide-module.html from Bio.PDB.Polypeptide import aa1 # aa1 = 'ACDEFGHIKLMNPQRSTVWY' from Bio.PDB.Polypeptide import aa3 # aa3 = ['ALA', 'CYS', 'ASP', 'GLU', 'PHE', 'GLY', 'HIS', 'ILE',... ] import tqdm as tqdm # tqdm - useful for estimating computing times for long for loops except ImportError: print("ERROR: Need to check Biopython imports!") # Before running anything, call foldx on the WT to get the optimized structure to mutate title = PDB.split('.') name = title[0] callfoldx(PDB) # Call FoldX on the WT # Path to where the WT PDBs are stored WTArray = [] nameArray = [] for file in os.listdir( PATH ): # List the fxout files in the directory, and store them in the array if file.endswith(".pdb"): FileLocation = os.path.join(PATH, file) WTArray.append(FileLocation) nameArray.append(file) # Subprocessing block for WT subprocess.Popen("mkdir {}".format(name), shell=True) # Make directory subprocess.Popen("mv OP_{}.fxout {}/.".format(name, name), shell=True) # Move optmiized fxout file to directory subprocess.Popen( "mv Optimized_{}.pdb {}".format(name, PDB), shell=True ) # Rename file from Optimized_PDB.pdb to the same name as the original file to make our lives easier MutationSpecies = [] # List to store the names of the mutated speices AminoAcidListDict = { } # Dictionary to assign alpabetical letters to amino acids for index, code in enumerate(standard_aa_names): AminoAcidListDict[aa1[index]] = aa3[ index] # Building the mutation dictionary for each code parser = PDBParser(PERMISSIVE=1) # Standard PDB parser PDBList = set() for pdb in DataFrame['#Pdb']: pdbname = pdb.split('_')[0] string = "{}.pdb".format(pdbname) PDBList.add(string) if PDB not in PDBList: raise Exception( "The PDB is not in the SKEMPI list" ) # Not in the PDB list we expect - i.e. from the SKEMPI list # Search for PDB mutations that contain the PDB string - e.g. the 1CSE mutations will have the format 1CSE_E_I # where it indicates the mutations were made in the 1CSE E and I chains MutationList = DataFrame.loc[(DataFrame['NAME'] == PDB.split('.')[0] )] # This should get the PDB mutations MutationList = MutationList.reset_index() print(MutationList) # Make a dictionary (hash map) with the mutation name and the residue lists to change for index, entry in MutationList.iterrows(): structure = parser.get_structure(str(title[0]), PDB) # reset structure each time model = structure[0] # Switch back to the unchanged one for mut in entry['MutCleanSplit']: initAA, chain, loc, mutAA = re.findall('(\d+|.)', mut) # Check we are reading the right residue and index assert (model[chain][int(loc)].resname == AminoAcidListDict[str( initAA)] ) # This will check that the model is the unmutated pdb print("Mutating {} on index {} of chain {} to {}".format( AminoAcidListDict[str(initAA)], chain, loc, AminoAcidListDict[str(mutAA)])) model[chain][int(loc)].resname = AminoAcidListDict[str( mutAA )] # This command replaces the nonmutated species into the mutated one assert ( model[chain][int(loc)].resname == AminoAcidListDict[str(mutAA)] ) # This will check that the mutation was successful mutanttotalstring = '_'.join(entry['MutCleanSplit']) mutatedname = "{}_{}_{}.pdb".format(entry['#Pdb'], mutanttotalstring, index) MutationSpecies.append(mutatedname) io = PDBIO(structure) io.set_structure(model) io.save( mutatedname ) # This should print out the name of protein, the mutaton list, and the index on the pandas file print("Produced new mutation PDB file {}".format( mutatedname)) # Printing out sign to say the pdb was produced # Call foldx on the mutatied species print(" -----------------------------------------------") print("The following mutant species are to be optimized") print(" -----------------------------------------------") for mutant in MutationSpecies: print("PDB file: {}".format(mutant)) ANS = [] for species in MutationSpecies: callfoldx(species) # Call FoldX on each mutated species subprocess.Popen("mv {} {}/.".format(species, name), shell=True) subprocess.Popen("mv OP_{}.fxout {}/.".format( species.split(".")[0], name), shell=True) subprocess.Popen("mv Optimized_{}.pdb {}/.".format( species.split(".")[0], name), shell=True) ANS.extend(ReadEnergy("{}/".format(name))) print(ANS) print("Finished Optimization") print("Running Ialign..")
print() print("PDB file:", args.pdb_file.name) print("Selected Residue 1 Chain: {}, Residue number: {}".format( chain_id1, res_num1)) print("Selected Residue 2 Chain: {}, Residue number: {}".format( chain_id2, res_num2)) # Check whether the input is complete sys.exit print on the std.err and exits if not chain_id1 or not res_num1: sys.exit("ERROR: unknown either chain id or residue 1 number") if not chain_id2 or not res_num2: sys.exit("ERROR: unknown either chain id or residue 2 number") parser = PDBParser(PERMISSIVE=1) print() print('Parsing', args.pdb_file.name) # load structure from PDB file of PDB ifle handler st = parser.get_structure('STR', args.pdb_file) # Checking residues exist and are different if chain_id1 not in st[0] or res_num1 not in st[0][chain_id1]: sys.exit("ERROR: non existing chain or residue") if chain_id2 not in st[0] or res_num2 not in st[0][chain_id2]: sys.exit("ERROR: non existing chain or residue") if (chain_id1 == chain_id2) and (res_num1 == res_num2): sys.exit("ERROR: identical residues") print()
def get_pdb(pdb_code): p = PDBParser() structure = p.get_structure(pdb_code, pdb_code) structure.header return structure
def make_structure_for_pdbfile(file, structure_id): p = PDBParser(PERMISSIVE=1) structure = p.get_structure(structure_id, file) #model = structure[0] return structure
# VARIOUS CLASSIFIERS FOR THE PDB FILES import gzip import sys from Bio.PDB.PDBParser import PDBParser parser = PDBParser(PERMISSIVE=0, QUIET=True) pathPDB = "/bmm/data/rcsb/data/structures/all/pdb" # LOADING THE FILE WHICH WAS USED TO DEFINE THE MUTANTS f = open("pdb_seqres.txt", "r") ft = f.readlines() f.close() k = 0 gt = ft[k].split() R = gt[1][4:len(gt[1])] while R == "protein": gt = ft[k].split() R = gt[1][4:len(gt[1])] k = k + 2 proindex = k # OUTPUT g = open("PDB_classifier.txt", "w") h = open("struct_not_found.txt", "w")
import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import matplotlib.cm as cm import matplotlib.colors as col from scipy.optimize import curve_fit import pandas as pd from math import factorial import random import time import Bio import pickle from Bio.PDB.PDBParser import PDBParser parser = PDBParser(PERMISSIVE=1) def atom_id(atom): n_atomtypes = 4 id_mat = np.zeros([1,n_atomtypes])[0] # Atom type 1: Carbon if atom.get_name()[0]=='C': id_mat[0] = 1 # Atom type 2: Nitrogen if atom.get_name()[0]=='N': id_mat[1] = 1 # Atom type 3: Oxygen if atom.get_name()[0]=='O': id_mat[2] = 1
"--output_path", default='/data/databases/pdb/processed/domain_analisis') args = parser.parse_args() domains = defaultdict(lambda: []) for seq in bpio.parse(args.data_path + "/processed/domains.fasta", "fasta"): domains["_".join(seq.id.split("_")[0:2])].append(seq.id.split("_")) for (code, pdb_path) in tqdm(PDBs(pdb_dir=args.data_path)): pdb_model = PDB(code=code) pdb_model.save() p = PDBParser(PERMISSIVE=True, QUIET=True) try: for chain in p.get_structure(code, pdb_path).get_chains(): chains_dir = args.output_path + "/chains/" + code[1:3] + "/" mkdir(chains_dir) cs = ChainSplitter(chains_dir) process_chain(pdb_path, code, chain.id, pdb_model) for (_, _, res_start, res_end, dn, dn_start, dn_end) in domains[code + "_" + chain.id]: # 1r9d_A_2_787_PF02901.14_8_648 try: domains_dir = args.output_path + "/domains/" + code[ 1:3] + "/" dn_start = int(dn_start) dn_end = int(dn_end)
#Note: If top500H folder isn't in same directory, then the randint-funciton # end-value ,len(list), will be equal to 0, and so this code will not run due to # 'empty range for randrange() (0,0, 0) error #Borrowed from teacher's solution (distance-histograms assignment) and modified a bit. if __name__=="__main__": '''Iterate through and parse all files in a folder ''' import glob #Filename pattern matching # Create a list of protein structures structure_list = [] for index, fname in enumerate(glob.glob("top500H/*")): print(f"Parsing {fname}... ") p=PDBParser(QUIET=True) #Silences warnings try: #Extract structure and append to list s=p.get_structure("", fname) structure_list.append(s) except: #Skips unparsable files and print error code print(f"- ERROR in {fname}, therefor it has been skipped.") def protein_aalist(s, aa): '''Goes through one protein and createst a list of amino acids from it ''' list_of_aa = [] for res in s[0].get_residues(): if is_aa(res): #Tests object identity
def ParsePDB(pdbpth, mutant_tag, accept_atom=('CA', ), center='CA'): """ :param pdbpth: :param mutant_tag:# ['key', 'PDB', 'WILD_TYPE', 'CHAIN', 'POSITION', 'MUTANT'] :param atom_list: :param center: :return: """ import warnings from Bio import BiopythonWarning from Bio.PDB.PDBParser import PDBParser warnings.simplefilter('ignore', BiopythonWarning) df_pdb = pd.DataFrame({ 'chain': [], 'res': [], 'het': [], 'posid': [], 'inode': [], 'full_name': [], 'atom_name': [], 'dist': [], 'x': [], 'y': [], 'z': [], 'occupancy': [], 'b_factor': [] }) key, pdbid, wtaa, mtchain, pos, mtaa = mutant_tag.split('.') print('The pdbid is:', pdbid, 'pth: %s' % pdbpth) # -------------------------------------------------------------------------------------------------------------- # consider mapping if pdbpth.split('/')[-1] == 'model1.pdb': map_pos_pth = '/public/home/sry/mCNN/dataset/TR/map_pos/%s_mapping.csv' % pdbid df_map = pd.read_csv(map_pos_pth) df_map[['POSITION_OLD']] = df_map[['POSITION_OLD']].astype(str) df_map[['POSITION_NEW']] = df_map[['POSITION_NEW']].astype(str) pos = df_map.loc[ (df_map.CHAIN == mtchain) & (df_map.POSITION_OLD == pos), 'POSITION_NEW'].values[0] #CHAIN,POSITION_OLD,POSITION_NEW # -------------------------------------------------------------------------------------------------------------- if pos.isdigit(): INODE = ' ' POSID = int(pos) else: INODE = pos[-1] POSID = int(pos[:-1]) MT_pos = (' ', POSID, INODE) parser = PDBParser(PERMISSIVE=1) structure = parser.get_structure(pdbid, pdbpth) model = structure[0] if pdbpth.split('/')[-1] == 'model1.pdb': try: assert model['A'][MT_pos].get_resname() == aa_123dict[ wtaa] #TR_wild except: assert model['A'][MT_pos].get_resname() == aa_123dict[ mtaa] #TR_mut else: assert model[mtchain][MT_pos].get_resname() == aa_123dict[wtaa] if center == 'CA': if pdbpth.split('/')[-1] == 'model1.pdb': center_coord = model['A'][MT_pos]['CA'].get_coord() else: center_coord = model[mtchain][MT_pos]['CA'].get_coord() for chain in model: chain_name = chain.get_id() res_id_lst = [res.get_id() for res in chain] print('The res_number in chain %s is: %d' % (chain_name, len(res_id_lst))) res_list = [chain[res_id] for res_id in res_id_lst] for res in res_list: res_name = res.get_resname() het, pos_id, inode = res.get_id() for atom in res: full_name, coord, occupancy, b_factor = atom.get_name( ), atom.get_coord(), atom.get_occupancy(), atom.get_bfactor() if not full_name in accept_atom: continue name = full_name.strip()[0] # if name in ('0','1','2','3','4','5','6','7','8','9','H','D'): # if not name in ('C','O','N','S'): dist = np.linalg.norm(center_coord - coord) x, y, z = coord temp_array = np.array([ chain_name, res_name, het, pos_id, inode, full_name, name, dist, x, y, z, occupancy, b_factor ]).reshape(1, -1) temp_df = pd.DataFrame(temp_array) temp_df.columns = df_pdb.columns df_pdb = pd.concat([df_pdb, temp_df], axis=0, ignore_index=True) break df_pdb[['dist']] = df_pdb[['dist']].astype(float) print('The atom_number (only CA) is:', len(df_pdb)) return df_pdb, center_coord
"AE", "BE", "CE", "DE", "EE", "FE", "AF", "BF", "CF", "DF", "EF", "FF" ] with open(outfile, "a") as f: f.write("id" + '\t') for i in alphabet: f.write(i + '\t') f.write('\n') with open(infile) as f1: pdblist = f1.read().splitlines() for pdbid in pdblist: pdbFile = pdbid + ".pdb" ## First, open and parse the protein file p = PDBParser(PERMISSIVE=1) structure = p.get_structure(pdbFile, pdbFile) print(pdbid) for model in structure: for chain in model: seq = list() chainID = chain.get_id() for residue in chain: if is_aa(residue.get_resname(), standard=True): seq.append(three_to_one(residue.get_resname())) else: seq.append("X") chainseq = str("".join(seq)) chainlength = len(chainseq)
def main(): parser = argparse.ArgumentParser(prog='polarContacts', description='Polar contacts detector') parser.add_argument('--backonly', action='store_true', dest='backonly', help='Restrict to backbone') parser.add_argument('--nowats', action='store_true', dest='nowats', help='Exclude water molecules') parser.add_argument('--diel', type=float, action='store', dest='diel', default=1.0, help='Relative dielectric constant') parser.add_argument('--vdw', action='store', dest='vdwprm', help='VDW Paramters file') parser.add_argument('--rlib', action='store', dest='reslib', help='AminoAcid library') parser.add_argument('pdb_path') args = parser.parse_args() print("Settings") print("--------") for k, v in vars(args).items(): print('{:10}:'.format(k), v) backonly = args.backonly nowats = args.nowats pdb_path = args.pdb_path vdwprm = args.vdwprm reslib = args.reslib diel = args.diel # Load VDW parameters vdwParams = VdwParamset(vdwprm) print("{} atom types loaded".format(vdwParams.ntypes)) # Load AA Library aaLib = ResiduesDataLib(reslib) print("{} amino acid atoms loaded".format(aaLib.nres)) if not pdb_path: parser.print_help() sys.exit(2) parser = PDBParser(PERMISSIVE=1) try: st = parser.get_structure('st', pdb_path) except OSError: print("#ERROR: loading PDB") sys.exit(2) # Checking for models if len(st) > 1: print("#WARNING: Several Models found, using only first") # Using Model 0 any way st = st[0] # Making a list of polar atoms polats = [] if backonly: selected_atoms = backbone_polars else: selected_atoms = all_polars for at in st.get_atoms(): if at.id in selected_atoms: polats.append(at) #Searching for contacts under HNLNK on diferent residues nbsearch = NeighborSearch(polats) hblist = [] for at1, at2 in nbsearch.search_all(HBLNK): if at1.get_parent() == at2.get_parent(): continue #Discard covalents and neighbours if (at1 - at2) < COVLNK: continue if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1: continue # remove waters if nowats: if at1.get_parent().get_resname() in waternames \ or at2.get_parent().get_resname() in waternames: continue # atom1 = Atom(at1,1,aaLib,vdwParams) # atom2 = Atom(at2,1,aaLib,vdwParams) if at1.get_serial_number() < at2.get_serial_number(): hblist.append([at1, at2]) else: hblist.append([at2, at1]) print() print() print("Polar contacts") print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)')) for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): r1 = hb[0].get_parent() r2 = hb[1].get_parent() print('{:14} {:14} {:6.3f} '.format( r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id, r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1])) print() print("Residue interactions") # Making list or residue pairs to avoid repeated pairs respairs = [] for hb in hblist: r1 = hb[0].get_parent() r2 = hb[1].get_parent() if [r1, r2] not in respairs: respairs.append([r1, r2]) print('Exercise A') l = [] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. evdw = 0. for at1 in rpair[0].get_atoms(): resid1 = rpair[0].get_resname() atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) vdwprm1 = vdwParams.atTypes[atparam1.atType] for at2 in rpair[1].get_atoms(): resid2 = rpair[1].get_resname() atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) vdwprm2 = vdwParams.atTypes[atparam2.atType] eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / ( at1 - at2) eps = math.sqrt(vdwprm1.eps * vdwprm2.eps) sig = math.sqrt(vdwprm1.sig * vdwprm2.sig) evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 - (sig / (at1 - at2))**6) #print (resid1,rpair[0].id[1],resid2,rpair[1].id[1],eint,evdw, eint+evdw) l.append([ resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw, eint + evdw ]) for index, element in enumerate(sorted(l, key=lambda i: i[6])): if index < 5: print(element) #Exercise B 1 print('Exercise B.1') mainmain = [] mainside = [] sidemain = [] sideside = [] to_main = [] to_side = [] for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()): resid1 = hb[0].get_parent() resid2 = hb[1].get_parent() if hb[0].id in backbone_polars: a = 'main' else: a = 'side' if hb[1].id in backbone_polars: b = 'main' else: b = 'side' label = a + '-' + b if label == 'main-main': mainmain.append([ resid1.get_resname(), resid1.id[1], resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id, hb[0] - hb[1] ]) if (str(resid1.get_resname()) + ' ' + str(resid1.id[1])) not in to_main: to_main.append( str(resid1.get_resname()) + ' ' + str(resid1.id[1])) if (str(resid2.get_resname()) + ' ' + str(resid2.id[1])) not in to_main: to_main.append( str(resid2.get_resname()) + ' ' + str(resid2.id[1])) elif label == 'main-side': mainside.append([ resid1.get_resname(), resid1.id[1], resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id, hb[0] - hb[1] ]) if (str(resid2.get_resname()) + ' ' + str(resid2.id[1])) not in to_main: to_main.append( str(resid2.get_resname()) + ' ' + str(resid2.id[1])) if (str(resid1.get_resname()) + ' ' + str(resid1.id[1])) not in to_side: to_side.append( str(resid1.get_resname()) + ' ' + str(resid1.id[1])) elif label == 'side-main': sidemain.append([ resid1.get_resname(), resid1.id[1], resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id, hb[0] - hb[1] ]) if (str(resid2.get_resname()) + ' ' + str(resid2.id[1])) not in to_side: to_side.append( str(resid2.get_resname()) + ' ' + str(resid2.id[1])) if (str(resid1.get_resname()) + ' ' + str(resid1.id[1])) not in to_main: to_main.append( str(resid1.get_resname()) + ' ' + str(resid1.id[1])) else: sideside.append([ resid1.get_resname(), resid1.id[1], resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id, hb[0] - hb[1] ]) if (str(resid1.get_resname()) + ' ' + str(resid1.id[1])) not in to_side: to_side.append( str(resid1.get_resname()) + ' ' + str(resid1.id[1])) if (str(resid2.get_resname()) + ' ' + str(resid2.id[1])) not in to_side: to_side.append( str(resid2.get_resname()) + ' ' + str(resid2.id[1])) for i in mainmain: print(i) for i in mainside: print(i) for i in sidemain: print(i) for i in sideside: print(i) nmain = [] nummain = [] nside = [] numside = [] for i in range(len(to_main)): nmain.append('to_main') nummain.append(i) for i in range(len(to_side)): nside.append('to_side') numside.append(i + len(to_main)) x = np.array(nummain + numside) y = np.array(nmain + nside) res = to_main + to_side plt.xticks(x, res) plt.plot(x, y, 'ro') plt.show() #It is generated a plot indicating if each residue is interacting with one or more elements either in main chain or in side chain #End of exercise B 1 print() print('Exercise B', 2) ## From http://cib.cf.ocha.ac.jp/bitool/ASA/ I have obtained that the residues in the surface are: surface = [['ILE', 3], ['VAL', 5], ['ILE', 23], ['VAL', 26], ['ILE', 30], ['GLN', 41], ['LEU', 43], ['LEU', 56], ['ILE', 61], ['LEU', 67], ['LEU', 69]] l = [] for rpair in sorted(respairs, key=lambda i: i[0].id[1]): eint = 0. for at1 in rpair[0].get_atoms(): resid1 = rpair[0].get_resname() resid1id = rpair[0].id[1] atid1 = at1.id atparam1 = aaLib.getParams(resid1, atid1) for at2 in rpair[1].get_atoms(): resid2 = rpair[1].get_resname() resid2id = rpair[1].id[1] atid2 = at2.id atparam2 = aaLib.getParams(resid2, atid2) for i in surface: for j in surface: if resid1 == i[0] and resid1id == i[1] and resid2 == j[ 0] and resid2id == j[1]: eint = eint + 80 * atparam1.charg * atparam2.charg / diel / ( at1 - at2) if eint != 0: l.append([resid1, resid1id, resid2, resid2id, eint]) for i in l: print(i)
args.input except AttributeError: from modeller import environ import modeller_caller as mc env = environ() # Some variables needed for the modeller modeler = mc.modeller_caller(env) # Convert the fasta alignment in pir format if not args.fasta and not args.pir: raise argparser.error("Required a fasta or a pir alignment") elif args.fasta: modeler.convert_ali(args.fasta, args.pir) modeler.modelize(args.pir, args.seq, args.models) else: # Retrieve the PDB structure, filter and get sequence logging.captureWarnings(True) parser = PDBParser(PERMISSIVE=1) if os.path.isfile(args.input): pdbpath = args.input else: pdbl = PDBList() try: pdbpath = plots.pdb_download(args.input, os.getcwd()) except: raise FileExistsError("Make sure your query format is correct") structure = parser.get_structure("cozmic_pdb_query", pdbpath) residues = cm.filter_residues(structure) s = "" for residue in residues: s += SCOPData.protein_letters_3to1.get(residue.get_resname(), 'X') seq = Seq(s, generic_protein) # sys.stderr.write("Protein sequence:%s\n" % seq)
def parse_atoms_infile(filename): ''' Parse a PDB file and return atom list.\n parse_atoms_infile(filename):\n File needs to be a PDB file format (*.ent or *.pdb) ''' p = PDBParser(QUIET=True) s = p.get_structure("X", filename) atom_list = [atom for atom in s.get_atoms() if atom.name == 'CB'] return atom_list