def extract_chain(structure: Structure, chain_ids: list, output_path: str): output_file = output_path + "/preped.pdb" ChainExtractor.structure = structure if len(chain_ids) == 0: io = PDBIO() io.set_structure(structure) io.save(output_file) return class ChainSelect(Select): def accept_model(self, model): if model == ChainExtractor.structure[0]: return 1 else: return 0 def accept_chain(self, chain): chains = [] for chain_id in chain_ids: chains.append(ChainExtractor.structure[0][chain_id]) if chain in chains: return 1 else: return 0 io = PDBIO() io.set_structure(structure) io.save(output_file, ChainSelect())
def test_copy_and_write_disordered(self): """Extract, save, and parse again disordered atoms.""" writer = PDBIO() s = self.structure # Extract the chain object chain = s[0]["A"] writer.set_structure(chain) filenumber, filename = tempfile.mkstemp() # save to temp file os.close(filenumber) try: writer.save(filename) # Parse again s2 = self.parser.get_structure("x_copy", filename) # Do we have the same stuff? atoms1 = self.unpack_all_atoms(s) atoms2 = self.unpack_all_atoms(s2) self.assertEqual(len(atoms1), len(atoms2)) for ai, aj in zip(atoms1, atoms2): self.assertEqual(ai.name, aj.name) finally: os.remove(filename)
def align_structures_biopython(struct_path_ref, struct_path_query, new_query_path): def get_alignment(pdb_ref, pdb_query): seq_ref = get_sequence(pdb_ref) seq_query = get_sequence(pdb_query) aligned = get_pairwise_alignment(seq_ref, seq_query) aln_ref = aligned["ref_seq"] aln_query = aligned["query_seq"] aln = MultipleSeqAlignment([ SeqRecord(Seq(aln_ref, generic_protein), id="ref"), SeqRecord(Seq(aln_query, generic_protein), id="query") ]) return aln def get_sequence(pdb): seq = "" if len(pdb) > 1: raise ValueError( "Can not handle structures with more than one MODEL!\nThis structure has {0} MODELS!" .format(len(pdb))) if len(pdb[0]) > 1: raise ValueError( "Can not handle structures with more than one CHAIN!\nThis structure has {0} CHAINS!" .format(len(pdb[0]))) for model in pdb: for chain in model: for res in chain: if res.resname in to_one_letter_code: seq = "{0}{1}".format(seq, to_one_letter_code[res.resname]) return seq struct_ref = struct_path_ref struct_query = struct_path_query parser = PDBParser() pdb_ref = parser.get_structure("ref", struct_ref) pdb_query = parser.get_structure("query", struct_query) aln = get_alignment(pdb_ref, pdb_query) coords_ref = [] coords_query = [] al = StructureAlignment(aln, pdb_ref, pdb_query) for (r1, r2) in al.get_iterator(): if r1 is not None and r2 is not None: coords_ref.append(r1['CA']) coords_query.append(r2['CA']) coords_ref = np.array(coords_ref) coords_query = np.array(coords_query) super_imposer = Superimposer() super_imposer.set_atoms(coords_ref, coords_query) super_imposer.apply(pdb_query.get_atoms()) io = PDBIO() io.set_structure(pdb_query) io.save(new_query_path)
def test_pdbio_select(self): """Write a selection of the structure using a Select subclass""" # Selection class to filter all alpha carbons class CAonly(Select): """ Accepts only CA residues """ def accept_atom(self, atom): if atom.name == "CA" and atom.element == "C": return 1 io = PDBIO() struct1 = self.structure # Write to temp file io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename, CAonly()) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 70) finally: os.remove(filename)
def get(self, request, *args, **kwargs): if self.kwargs['substructure'] == 'select': return HttpResponseRedirect('/structure/pdb_segment_selection') if self.kwargs['substructure'] == 'full': out_stream = request.session['cleaned_structures'] elif self.kwargs['substructure'] == 'custom': simple_selection = request.session.get('selection', False) selection = Selection() if simple_selection: selection.importer(simple_selection) io = PDBIO() zipf_in = zipfile.ZipFile(request.session['cleaned_structures'], 'r') out_stream = BytesIO() zipf_out = zipfile.ZipFile(out_stream, 'w', zipfile.ZIP_DEFLATED) for name in zipf_in.namelist(): tmp = StringIO() io.set_structure(PDBParser(QUIET=True).get_structure(name, StringIO(zipf_in.read(name).decode('utf-8')))[0]) io.save(tmp, SubstructureSelector(request.session['substructure_mapping'], parsed_selection=SelectionParser(selection))) zipf_out.writestr(name, tmp.getvalue()) zipf_in.close() zipf_out.close() del request.session['substructure_mapping'] if len(out_stream.getvalue()) > 0: response = HttpResponse(content_type="application/zip") response['Content-Disposition'] = 'attachment; filename="pdb_structures.zip"' response.write(out_stream.getvalue()) return response
def save_chain_pdb(target, fname, pdb_fname, ind, skip_chain=False, old=True): pdb_parser = PDBParser(PERMISSIVE=1) s = pdb_parser.get_structure(target[0:4], pdb_fname) model = s[0] chains = model.child_list if len(chains) == 1 and chains[0].get_id() == ' ': chains[0].id = target[-1] io = PDBIO() io.set_structure(s) tmp_file = os.path.join(os.getcwd(), f'{target}_tmp.pdb') class ChainSelect(Select): def __init__(self, chain, skip_chain, old=True): self._chain = chain self._skip_chain = skip_chain self._old = old def accept_chain(self, chain): if chain.id == self._chain or self._skip_chain: return 1 else: return 0 def accept_residue(self, residue): if residue.full_id[3][0] == ' ' or residue.full_id[3][0] == 'H_MSE': return 1 elif self._old and residue.full_id[3][0] != 'W': return 1 else: return 0 io.save(tmp_file, ChainSelect(target[4], skip_chain=skip_chain, old=old)) reres_cmd = f'pdb_reres -{ind} {tmp_file} > {fname}' subprocess.call(reres_cmd, shell=True) os.remove(tmp_file)
def write_structure(structure, nfileout, chainname): """ Write the clean structure in a pdb file Remove AltLoc by selecting A Remove water molecules Renumber atoms starting at 1 (Optional select chain) """ io = PDBIO() io.set_structure(structure) # foundHetAtm = 0 ## Selecting chain, removing altlocs and water, renumbering class AtomSelect(Select): def accept_atom(self, atom): i = 1 # if(atom.get_record_type()=='HETATM'): # foundHetAtm = 1 if not chainname or atom.get_parent().get_parent().get_id( ) == chainname: if not atom.get_parent().get_id()[0] == 'W': if not atom.is_disordered() or atom.get_altloc() == 'A': atom.set_altloc(' ') atom.set_serial_number(i) i += 1 return 1 else: return 0 # if (foundHetAtm == 1): # print "Found HetAtm entries, might have to adapt residue profiles." io.save(nfileout, AtomSelect())
def save_pdb(structure, filename, selector=None): io = PDBIO() io.set_structure(structure) if selector: io.save(filename, selector) #'1btl-r1.pdb' else: io.save(filename)
def selectChain(ifn, ofn, chainID='A'): parser = PDBParser() structure = parser.get_structure('x', ifn) class ChainSelector(): def __init__(self, chainID=chainID): self.chainID = chainID def accept_chain(self, chain): if chain.get_id() == self.chainID: return 1 return 0 def accept_model(self, model): return 1 def accept_residue(self, residue): return 1 def accept_atom(self, atom): return 1 sel = ChainSelector(chainID) io = PDBIO() io.set_structure(structure) io.save(ofn, sel)
def deleteChain():# Delete a complete chain from a pdb and save the new structure in pdbname_free.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() seq='' nb_chain=input('How many chain do you want to delete : ') for i in range(nb_chain): rm_chain=raw_input('What chain you want to delete : ') for model in structure: for chain in model: if(chain.id==rm_chain): model.detach_child(chain.id) pept = raw_input('Do you want to get a pdb with the sequence in its name : ') if(pept == 'y'): ppb=PPBuilder() for pp in ppb.build_peptides(structure): seq = seq + pp.get_sequence() seq=seq.lower() seq=str(seq) w = PDBIO() w.set_structure(structure) w.save(seq+'_bound.pdb') else: w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_without'+rm_chain+'.pdb')
def RemoveLigandsOneBioUnit(biounit, ligandlist): # ligandlist is a residue list with residue chain id, name and residue number p = PDBParser(PERMISSIVE = 1) pdbname= biounit.split("/")[-1] try: models = p.get_structure(pdbname, biounit) except: return None #for model in models: # for chain in model: # for residue in chain: # print residue for rligand in ligandlist: for model in models: for chain in model: for residue in list(chain): if chain.id == rligand["ChainID"] and int(rligand["ResNum"]) == residue.id[1]: chain.detach_child(residue.id) elif residue.id[0] == "W": chain.detach_child(residue.id) elif len(rligand["LigName"].split()) > 1 and int(rligand["ResNum"]) <= residue.id[1]: LongLigand(chain, residue, rligand) io = PDBIO() io.set_structure(models) filepath = os.path.join(BIOSTRDIR, models.id) io.save(filepath)
def rotateTranslatePdb(fname, rotX, rotY, rotZ, transX, transY, transZ, fnameOut=None): print(fname, rotX, rotY, rotZ, transX, transY, transZ, fnameOut) struct = myPDBParser(QUIET=True).get_structure(fname) rotationX = rotaxis(-rotX, Vector(1, 0, 0)) rotationY = rotaxis(rotY, Vector(0, 1, 0)) rotationZ = rotaxis(-rotZ, Vector(0, 0, 1)) translation = np.array((transX, transY, transZ), 'f') rotation = rotationX.dot(rotationY).dot(rotationZ) struct.transform(rotation, translation) if fnameOut is not None: fnameOut = fnameOut pdbWriter = PDBIO() pdbWriter.set_structure(struct) pdbWriter.save(fnameOut) return struct
def aa_pdb(pocket, protein_name): print('started') os.chdir(path / protein_name / 'pockets') with open(pocket) as pk: aas = [] for line in pk.readlines(): if line.startswith('ATOM'): aas.append(line.split()[5]) aas = list(set(aas)) os.chdir(path / protein_name) parser = PDBParser() protein_name1 = protein_name + '.pdb' structure = parser.get_structure(protein_name, protein_name1) class Pocket(Select): def accept_residue(self, residue): if str(residue.get_id()[1]) in aas: return 1 else: return 0 io = PDBIO() io.set_structure(structure) pocket_name = protein_name.replace('out', '') + pocket.replace( 'pocket', '_').replace('_atm', '') + '.pdb' os.chdir(pdb_pockets) io.save(pocket_name, Pocket())
def test_conversion(self): """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare""" cif_parser = MMCIFParser(QUIET=1) cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif") pdb_writer = PDBIO() pdb_writer.set_structure(cif_struct) filenumber, filename = tempfile.mkstemp() pdb_writer.save(filename) pdb_parser = PDBParser(QUIET=1) pdb_struct = pdb_parser.get_structure('example_pdb', filename) # comparisons self.assertEqual(len(pdb_struct), len(cif_struct)) pdb_atom_names = [a.name for a in pdb_struct.get_atoms()] cif_atom_names = [a.name for a in cif_struct.get_atoms()] self.assertEqual(len(pdb_atom_names), len(cif_atom_names)) self.assertSequenceEqual(pdb_atom_names, cif_atom_names) pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()] cif_atom_elems = [a.element for a in cif_struct.get_atoms()] self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
def filter_structure(pdb,chain,lig,lig_num): # pdb_input = pdb_path + "/" + pdb + ":" + chain + ":" + lig + ":" + lig_num + ".atoms.pdb" pdb_input = pdb_path + "/" + pdb + ".pdb" pdb_output = output_path + "/" + pdb + ":" + chain + ":" + lig + ":" + lig_num + ".atoms.pdb" structure = PDBParser(QUIET=1).get_structure(pdb, pdb_input) atoms_pairs = NeighborSearch( list( structure.get_atoms() ) ).search_all(search_radius) res_list = set() for atom_pair in atoms_pairs: res1 = atom_pair[0].parent res_chain1 = res1.parent.id res_name1 = res1.resname res_num1 = str(res1.id[1]) res2 = atom_pair[1].parent res_chain2 = res2.parent.id res_name2 = res2.resname res_num2 = str(res2.id[1]) if ( (res_chain1 == chain and res_name1 == lig and res_num1 == lig_num) or (res_chain2 == chain and res_name2 == lig and res_num2 == lig_num) ): res_list.add(res1) res_list.add(res2) io = PDBIO() io.set_structure(structure) io.save(pdb_output, ResSelect(res_list))
def prepare_virtual_sites(pdb_file, use_cis_proline=False): parser = PDBParser(QUIET=True) structure=parser.get_structure('X',pdb_file,) for model in structure: for chain in model: r_im={} r_i={} for residue in chain: r_im=r_i r_i={} for atom in residue: r_i[atom.get_name()]=atom if use_cis_proline and residue.get_resname() == "IPR": if 'N' in r_i: r_i['N'].set_coord(-0.2094*r_im['CA'].get_coord()+ 0.6908*r_i['CA'].get_coord() + 0.5190*r_im['O'].get_coord()) if 'C' in r_im: r_im['C'].set_coord(0.2196*r_im['CA'].get_coord()+ 0.2300*r_i['CA'].get_coord() + 0.5507*r_im['O'].get_coord()) if 'H' in r_i: r_i['H'].set_coord(-0.9871*r_im['CA'].get_coord()+ 0.9326*r_i['CA'].get_coord() + 1.0604*r_im['O'].get_coord()) else: if 'N' in r_i: r_i['N'].set_coord(0.48318*r_im['CA'].get_coord()+ 0.70328*r_i['CA'].get_coord()- 0.18643 *r_im['O'].get_coord()) if 'C' in r_im: r_im['C'].set_coord(0.44365*r_im['CA'].get_coord()+ 0.23520*r_i['CA'].get_coord()+ 0.32115 *r_im['O'].get_coord()) if 'H' in r_i: r_i['H'].set_coord(0.84100*r_im['CA'].get_coord()+ 0.89296*r_i['CA'].get_coord()- 0.73389 *r_im['O'].get_coord()) io = PDBIO() io.set_structure(structure) io.save(pdb_file)
def save_pdb(struct, name): print "test", len(list(struct.get_residues())) for resi in struct.get_residues(): print resi.id, resi.resname out = PDBIO() out.set_structure(struct) out.save(str(name) + 'volume_simulator.pdb')
def vector2bfactor(vector, pdb_fh, pdb_clrd_fh): """ Incorporates vector with values to the B-factor of PDB file. :param vector: vector with values :param pdb_fh: path of input PDB file :param pdb_clrd_fh: path of output PDB file """ aas_21_3letter = [ 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL' ] pdb_parser = PDBParser() pdb_data = pdb_parser.get_structure("pdb_name", pdb_fh) for model in pdb_data: for chain in model: for residue in chain: if residue.get_resname() in aas_21_3letter: #only aas for atom in residue: #print residue.id[1] # break if residue.id[1] <= len(vector): atom.set_bfactor( vector[residue.id[1] - 1]) #residue.id is 1 based count pdb_io = PDBIO() pdb_io.set_structure(pdb_data) pdb_io.save(pdb_clrd_fh)
def test_conversion(self): """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare""" cif_parser = MMCIFParser(QUIET=1) cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif") pdb_writer = PDBIO() pdb_writer.set_structure(cif_struct) filenumber, filename = tempfile.mkstemp() pdb_writer.save(filename) pdb_parser = PDBParser(QUIET=1) pdb_struct = pdb_parser.get_structure('example_pdb', filename) # comparisons self.assertEqual(len(pdb_struct), len(cif_struct)) pdb_atom_names = [a.name for a in pdb_struct.get_atoms()] cif_atom_names = [a.name for a in pdb_struct.get_atoms()] self.assertEqual(len(pdb_atom_names), len(cif_atom_names)) self.assertSequenceEqual(pdb_atom_names, cif_atom_names) pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()] cif_atom_elems = [a.element for a in pdb_struct.get_atoms()] self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
def IntrinsicExhangeRatesAndProtectionFactors(ProteinStructure, DistanceCutoff, Temperature, pH, ReferenceData, EnergyCutoff, PathToDSSP, Betac, Betah): PrintBreak() # Calculate protection factor for ProteinModel in ProteinStructure: sys.stdout.write("Model " + str(ProteinModel.get_id()) + " contains ") # Strip hydrogens from structure if they are there RemoveHydrogens(ProteinModel) # Output temporary PDB file for DSSP TemporaryProteinStructure = Bio.PDB.Structure.Structure("Temporary") TemporaryProteinStructure.add(ProteinModel) OutputParser = PDBIO() OutputParser.set_structure(TemporaryProteinStructure) OutputParser.save("Temp.pdb") # Get intrinsic exchange rates and assign them to the b-factors CalculateExchangeRates(ProteinModel, Temperature, pH, ReferenceData) # Calculate protection factors DegreesOfBurial = CalculateDegreesOfBurial(ProteinModel, DistanceCutoff) HydrogenBonds = CalculateHydrogenBonds(ProteinModel, "Temp.pdb", EnergyCutoff, PathToDSSP) ProtectionsFactors = CalculateProtectionsFactors(DegreesOfBurial, HydrogenBonds, Betac, Betah) # Assign the logarithm of the protection factors to the occupancy AssignProtectionFactors(ProteinModel, ProtectionsFactors) # Remove temporary PDB file os.remove("Temp.pdb") PrintBreak() return
def process_pdb(pdb_file): # use the pdbfixer utility to make sure the pdbfile is properly represented out_file = args.output + str(pdb_file).replace(args.input, "") out_dir = out_file.replace("/com.pdb", "") if not os.path.exists(out_dir): os.makedirs(out_dir) with open(pdb_file) as f: fixer = pdbfixer.PDBFixer(pdbfile=f) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.4) with open(out_file, 'w') as handle: simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, handle) # now read back using biopython :) and apply the water/hydrogen filters parser = PDBParser(QUIET=True, structure_builder=SloppyStructureBuilder()) structure = parser.get_structure('', out_file) io = PDBIO() io.set_structure(structure) io.save(out_file, WaterSelect())
def write_specific_residue(blast,operon,operon_list): # iterate through the operon_list for operon_name in operon_list: # create dir for the operon_name os.mkdir(operon_dir+operon_name) for gene in operon[operon_name]: # get the info from the dic pdb = blast[gene] pdb_info = pdb[0].split(':') start = pdb[1] stop = pdb[2] filename = pdb_info[0].lower() chain_info = pdb_info[1] # parsing pdb file parser = PDBParser() io = PDBIO() try: structure = parser.get_structure(filename,pdb_dir+filename+'.pdb') chain = structure[0][chain_info] io.set_structure(chain) class ResiSelect(Select): def accept_residue(self, residue): if residue.get_id()[1] in range(start,stop+1): return True else: return False io.save(operon_dir+operon_name+'/'+gene+'.pdb', ResiSelect()) except: continue
def align_structures(alignment, structure, output_path): alignment.apply(structure) io = PDBIO() io.set_structure(structure) print("Saving to {}".format(output_path)) io.save(str(output_path)) return structure
def CIF2PDB(ciffile, pdbfile, verbose=False): #Not sure why biopython needs this to read a cif file strucid = ciffile[:4] if len(ciffile) > 4 else "1xxx" # Read file parser = MMCIFParser() structure = parser.get_structure(strucid, ciffile) # rename long chains try: chainmap = rename_chains(structure) except OutOfChainsError: logging.error("Too many chains to represent in PDB format") sys.exit(1) if verbose: for new, old in chainmap.items(): if new != old: logging.info("Renaming chain {0} to {1}".format(old, new)) #Write PDB io = PDBIO() io.set_structure(structure) #TODO What happens with large structures? io.save(pdbfile) return pdbfile
def extract_chain(structure: Structure, chain_ids: list, output_file: str): ChainExtractor.structure = structure # 如果为空则表示尝试修复后全部储存 if len(chain_ids) == 0: io = PDBIO() io.set_structure(structure) io.save(output_file) return # 存储特定链的选择器 class ChainSelect(Select): def accept_model(self, model): if model == ChainExtractor.structure[0]: return 1 else: return 0 def accept_chain(self, chain): chains = [] for chain_id in chain_ids: chains.append(ChainExtractor.structure[0][chain_id]) if chain in chains: return 1 else: return 0 io = PDBIO() io.set_structure(structure) io.save(output_file, ChainSelect())
def modified_residues_json(structure,modified_ensemble): io = PDBIO() pdb_parser = PDBParser() new_structure = pdb_parser.get_structure(" ", structure) standard_residue_list=["ALA","ARG","ASN","ASP","CYS","GLN", "GLU", "GLY","HIS","ILE","LEU", "LYS","MET","PHE","PRO","SER","THR","TRP","TYR","VAL"] res_modified=[] ##This list saves the detection of modified residues res_modified_informed = [] residue_resname = [] for i, residue in enumerate(new_structure.get_residues()): residue_resname.append(residue.resname) if residue.resname not in standard_residue_list: res_modified.append(residue.id[1]) for j in modified_ensemble: res_modified_informed.append(j[2]) for z in res_modified: if z not in res_modified_informed: new_modification=[] new_modification.append(residue_resname[z-1]) new_modification.append("ALA") new_modification.append(z) modified_ensemble.append(new_modification) for i, residue in enumerate(new_structure.get_residues()): res_id = list(residue.id) for j in modified_ensemble: if res_id[1] == j[2]: if residue.get_resname() == j[0]: residue.resname = j[1] io.set_structure(new_structure) io.save(structure)
def SplitChain(PDB_objects): """ Splits a list of PDB files by chain creating one PDB and one FASTA file per chain. Arguments: PDB_objects: list of PDB objects (with many chains) generated by the PDB parser. """ File_prefix = [] for pdb in PDB_objects: chain_names = set() io = PDBIO() # Creates a PDB file for each chain of the original file. for chain in pdb.get_chains(): if chain.get_id() not in chain_names: io.set_structure(chain) io.save(pdb.get_id() + "_" + chain.get_id() + ".pdb") File_prefix.append(pdb.get_id() + "_" + chain.get_id()) # Creates a FASTA file for each chain of the original file. polipeptide = PPBuilder() for pp in polipeptide.build_peptides(pdb): fasta = open(pdb.get_id() + "_" + chain.get_id() + ".fa", "w") fasta.write(">" + pdb.get_id() + "_" + chain.get_id() + "\n") fasta.write(str(pp.get_sequence())) chain_names.add(chain.get_id()) return File_prefix
def get(self, request, *args, **kwargs): if self.kwargs['substructure'] == 'custom': return HttpResponseRedirect('/structure/generic_numbering_selection') simple_selection = self.request.session.get('selection', False) selection = Selection() if simple_selection: selection.importer(simple_selection) out_stream = StringIO() io = PDBIO() request.session['gn_outfile'].seek(0) gn_struct = PDBParser(PERMISSIVE=True, QUIET=True).get_structure(request.session['gn_outfname'], request.session['gn_outfile'])[0] if self.kwargs['substructure'] == 'full': io.set_structure(gn_struct) io.save(out_stream) if self.kwargs['substructure'] == 'substr': io.set_structure(gn_struct) io.save(out_stream, GenericNumbersSelector(parsed_selection=SelectionParser(selection))) root, ext = os.path.splitext(request.session['gn_outfname']) response = HttpResponse(content_type="chemical/x-pdb") response['Content-Disposition'] = 'attachment; filename="{}_GPCRDB.pdb"'.format(root) response.write(out_stream.getvalue()) return response
def write( structure, name=None ): """ Writes a Structure in PDB format through PDBIO. Simplifies life.. """ from Bio.PDB import PDBIO io = PDBIO() io.set_structure(structure) if not name: s_name = structure.id else: s_name = name name = "%s.pdb" %s_name seed = 0 while 1: if os.path.exists(name): name = "%s_%s.pdb" %(s_name, seed) seed +=1 else: break io.save(name) return name
def save(self, output_file='converted.pdb'): """ Saves structure to a file. """ io = PDBIO() io.set_structure(self.st) io.save(output_file)
def _align(self): pp_a = self._pp(self.protein_A, 'A') # seq_a = pp_a.get_sequence() pp_b = self._pp(self.protein_B, ' ') # seq_b = pp_b.get_sequence() # global_align = pairwise2.align.globalxx(seq_a, seq_b)[0] # msa = MultipleSeqAlignment([SeqRecord(Seq(global_align[0], alphabet=generic_protein), id='A'), # SeqRecord(Seq(global_align[1], alphabet=generic_protein), id='B')]) msa = self.alignment # offset_a = re.search(r'[^-]', str(msa[0].seq)).span()[0] # offset_b = re.search(r'[^-]', str(msa[1].seq)).span()[0] plus = 1000 for i in range(len(pp_a)): pp_a[i].id = (pp_a[i].id[0], plus + i, pp_a[i].id[2]) for i in range(len(pp_b)): pp_b[i].id = (pp_b[i].id[0], plus + i, pp_b[i].id[2]) new_chain_a = Chain(' ') for i in pp_a: # i.id = (i.id[0], i.id[1] - plus, i.id[2]) new_chain_a.add(i) new_chain_b = Chain(' ') for i in pp_b: # i.id = (i.id[0], i.id[1] - plus, i.id[2]) new_chain_b.add(i) io = PDBIO() io.set_structure(new_chain_a) io.save(f'.tmp.protein_a.pdb') io = PDBIO() io.set_structure(new_chain_b) io.save(f'.tmp.protein_b.pdb')
def save_contacts(structure, chains, out_file): #Save only those chains that we are supposed to Select = Bio.PDB.Select class ConstrSelect(Select): def accept_chain(self, chain): #print dir(residue) if chain.id in chains: return 1 else: return 0 w = PDBIO() w.set_structure(structure) randint = random.randint(0, 9999999) w.save("TMP" + str(randint) + ".pdb", ConstrSelect()) #Remove the HETATM and TER lines f_tmp = open("TMP" + str(randint) + ".pdb", 'r') f_out = open(out_file, 'w') for line in f_tmp.readlines(): if line[0:3] != "TER" and line[0:6] != "HETATM": f_out.write(line) f_tmp.close() f_out.close() os.remove("TMP" + str(randint) + ".pdb")
def run(self): tmp = QTemporaryFile() result = {} io = None dssp = None prevChain = None key = None if tmp.open(): io = PDBIO() io.set_structure(self.struct) io.save(tmp.fileName()) try: dssp = DSSP(self.struct[0], tmp.fileName(), dssp='mkdssp') prevChain = next(iter(dssp.keys()))[0] for key in dssp.keys(): #print(key[0]) if key[0] == prevChain: #print(key) # I THINK I'M DOING THIS PART WRONG result[dssp[key][0] + self.offset] = dssp[key][2] self.finished.emit([result, self.seq, self.node]) except: traceback.print_exc() print("SORRY, DSSP WAS NOT FOUND") self.finished.emit([None, None, None]) del tmp, result, io, dssp, prevChain, key
def get_structure_string(self): from Bio.PDB import PDBIO from io import StringIO io_pdb = PDBIO() io_pdb.set_structure(self._entity) io_str = StringIO() io_pdb.save(io_str) return io_str.getvalue()
def write_pdb(self, structure, filename): """ Writting to the pdb_file, saving changed coordinated """ fp = open(filename, "w") io = PDBIO(1) io.set_structure(structure) io.save(fp)
def execute_freesasa(structure, selection=None): """ Runs the freesasa executable on a PDB file. You can get the executable from: https://github.com/mittinatten/freesasa The binding affinity models are calibrated with the parameter set for vdW radii used in NACCESS: http://www.ncbi.nlm.nih.gov/pubmed/994183 """ io = PDBIO() freesasa, param_f= FREESASA_BIN, FREESASA_PAR if not os.path.isfile(freesasa): raise IOError('[!] freesasa binary not found at `{0}`'.format(freesasa)) if not os.path.isfile(param_f): raise IOError('[!] Atomic radii file not found at `{0}`'.format(param_f)) # Rewrite PDB using Biopython to have a proper format # freesasa is very picky with line width (80 characters or fails!) # Select chains if necessary class ChainSelector(Select): """Selector class to filter for specific chains""" def accept_chain(self, chain): """Returns True for chains within the selection""" if selection and chain.id in selection: return 1 elif not selection: return 1 else: return 0 _pdbf = tempfile.NamedTemporaryFile() io.set_structure(structure) io.save(_pdbf.name, ChainSelector()) # Run freesasa # Save atomic asa output to another temp file _outf = tempfile.NamedTemporaryFile() cmd = '{0} --B-value-file={1} -c {2} {3}'.format(freesasa, _outf.name, param_f, _pdbf.name) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode: print('[!] freesasa did not run successfully', file=sys.stderr) print(cmd, file=sys.stderr) raise Exception(stderr) # Rewind & Parse results file # Save _outf.seek(0) asa, rsa = parse_freesasa_output(_outf) _pdbf.close() _outf.close() return asa, rsa
def save_structure(self, filename="barnacle.pdb"): """ Save the atomic coordinates of the sampled structure as a PDB file. @param filename: The filename of the PDB file to save. @type filename: string """ pdbio = PDBIO() pdbio.set_structure(self.structure) pdbio.save(filename)
def pushToPDB(self, path, *keys): seq_list = [self.getRegion(key) for key in keys] if not all(seq_list): self.printerr('pushToPDB: RESIDUE LIST IS EMPTY\n') return 0 io = PDBIO() io.set_structure(self.__struct) io.save(path + '/' +self.__name + ".pdb", self._ResSelect(*seq_list)) return 1
def clean_pdb(pdb_file, pdb_chain, out_dir): out_dir_chain = out_dir + '/' + 'chain' if not os.path.isfile(pdb_file): raise argparse.ArgumentTypeError("PDB file could not be found.") # Create output directories if they do not already exist if not os.path.exists(out_dir): os.makedirs(out_dir) if not os.path.exists(out_dir_chain): os.makedirs(out_dir_chain) # Grab PDB name and make sure it's converted to uppercase pdb_name = os.path.basename(pdb_file).split('.')[0].upper() # Extract chain of interest structure = parsePDBStructure(pdb_file) # Make sure chain exists, otherwise throw an error try: chain = structure[0][pdb_chain] except KeyError: print("\nERROR:\n\n\t"+pdb_name+": chain "+pdb_chain+" could not be found.\n") return io = PDBIO() chain_select = ChainSelect(pdb_chain) io.set_structure(structure) pdb_chain_file = out_dir_chain+'/'+pdb_name+'_'+pdb_chain+'_temp.pdb' io.save(pdb_chain_file, chain_select) # Remove HetAtoms temp_file = out_dir + "/" + pdb_name + "_temp.pdb" removeHetAtoms(pdb_file, temp_file) temp_file_chain = out_dir_chain + "/" + pdb_name + '_' + pdb_chain + "_temp2.pdb" removeHetAtoms(pdb_chain_file, temp_file_chain) # Renumber PDB structure = parsePDBStructure(temp_file) (new_pdb, renumbered_pdb) = renumberResidues(structure, pdb_name) structure_chain = parsePDBStructure(temp_file_chain) (new_pdb_chain, renumbered_pdb) = renumberResidues(structure_chain, pdb_name + '_' + pdb_chain) # Remove waters removeWaters(new_pdb, out_dir + "/" + pdb_name + ".pdb") removeWaters(new_pdb_chain, out_dir_chain+'/'+pdb_name+'_'+pdb_chain+'.pdb') # Clean up temporary files os.remove(pdb_chain_file) os.remove(temp_file) os.remove(temp_file_chain) os.remove(new_pdb_chain) os.remove(new_pdb)
def save_superimposed_pdb(self, out_filename): """ Saves the superimposed PDB in the given output filename. """ if self.__valid_alignment(): superimposed_pdb = self.__create_superimposed_pdb() # save it to a file io = PDBIO() io.set_structure(superimposed_pdb) io.save(out_filename)
def removeDoubleAtoms():# Remove all double atoms defined in a pdb and save the new structure in pdbname_noDouble.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() structure.remove_disordered_atoms() w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noDouble.pdb')
def post(self, request): root, ext = os.path.splitext(request.FILES['pdb_file'].name) generic_numbering = GenericNumbering(StringIO(request.FILES['pdb_file'].file.read().decode('UTF-8',"ignore"))) out_struct = generic_numbering.assign_generic_numbers() out_stream = StringIO() io = PDBIO() io.set_structure(out_struct) io.save(out_stream) print(len(out_stream.getvalue())) # filename="{}_GPCRdb.pdb".format(root) return Response(out_stream.getvalue())
def post (self, request, *args, **kwargs): frag_sp = FragmentSuperpose(StringIO(request.FILES['pdb_file'].file.read().decode('UTF-8', 'ignore')),request.FILES['pdb_file'].name) superposed_fragments = [] superposed_fragments_repr = [] if request.POST['similarity'] == 'identical': if request.POST['representative'] == 'any': superposed_fragments = frag_sp.superpose_fragments() else: superposed_fragments_repr = frag_sp.superpose_fragments(representative=True, state=request.POST['state']) superposed_fragments = frag_sp.superpose_fragments() else: if request.POST['representative'] == 'any': superposed_fragments = frag_sp.superpose_fragments(use_similar=True) else: superposed_fragments_repr = frag_sp.superpose_fragments(representative=True, use_similar=True, state=request.POST['state']) superposed_fragments = frag_sp.superpose_fragments(use_similar=True) if superposed_fragments == []: self.message = "No fragments were aligned." else: io = PDBIO() out_stream = BytesIO() zipf = zipfile.ZipFile(out_stream, 'a') for fragment, pdb_data in superposed_fragments: io.set_structure(pdb_data) tmp = StringIO() io.save(tmp) if request.POST['representative'] == 'any': zipf.writestr(fragment.generate_filename(), tmp.getvalue()) else: zipf.writestr("all_fragments//{!s}".format(fragment.generate_filename()), tmp.getvalue()) if superposed_fragments_repr != []: for fragment, pdb_data in superposed_fragments_repr: io.set_structure(pdb_data) tmp = StringIO() io.save(tmp) zipf.writestr("representative_fragments//{!s}".format(fragment.generate_filename()), tmp.getvalue()) zipf.close() if len(out_stream.getvalue()) > 0: request.session['outfile'] = { 'interacting_moiety_residue_fragments.zip' : out_stream, } self.outfile = 'interacting_moiety_residue_fragments.zip' self.success = True self.zip = 'zip' self.message = '{:n} fragments were superposed.'.format(len(superposed_fragments)) context = super(FragmentSuperpositionResults, self).get_context_data(**kwargs) attributes = inspect.getmembers(self, lambda a:not(inspect.isroutine(a))) for a in attributes: if not(a[0].startswith('__') and a[0].endswith('__')): context[a[0]] = a[1] return render(request, self.template_name, context)
def save_ligand(structure, filename): # Saves ligand to a filename.pdb Select = Bio.PDB.Select class LigandSelect(Select): def accept_residue(self, residue): for group in ligands.values(): if residue in group: return 1 else: return 0 io=PDBIO() io.set_structure(structure) io.save(filename+'.pdb', LigandSelect())
def post(self, request, *args, **kwargs): context = super(PDBClean, self).get_context_data(**kwargs) self.posted = True pref = True water = False hets = False if 'pref_chain' not in request.POST.keys(): pref = False if 'water' in request.POST.keys(): water = True if 'hets' in request.POST.keys(): hets = True # get simple selection from session simple_selection = request.session.get('selection', False) selection = Selection() if simple_selection: selection.importer(simple_selection) out_stream = BytesIO() io = PDBIO() zipf = zipfile.ZipFile(out_stream, 'w', zipfile.ZIP_DEFLATED) if selection.targets != []: for selected_struct in [x for x in selection.targets if x.type == 'structure']: struct_name = '{}_{}.pdb'.format(selected_struct.item.protein_conformation.protein.parent.entry_name, selected_struct.item.pdb_code.index) if hets: lig_names = [x.pdb_reference for x in StructureLigandInteraction.objects.filter(structure=selected_struct.item, annotated=True)] else: lig_names = None gn_assigner = GenericNumbering(structure=PDBParser(QUIET=True).get_structure(struct_name, StringIO(selected_struct.item.get_cleaned_pdb(pref, water, lig_names)))[0]) tmp = StringIO() io.set_structure(gn_assigner.assign_generic_numbers()) request.session['substructure_mapping'] = gn_assigner.get_substructure_mapping_dict() io.save(tmp) zipf.writestr(struct_name, tmp.getvalue()) del gn_assigner, tmp for struct in selection.targets: selection.remove('targets', 'structure', struct.item.id) # export simple selection that can be serialized simple_selection = selection.exporter() request.session['selection'] = simple_selection request.session['cleaned_structures'] = out_stream attributes = inspect.getmembers(self, lambda a:not(inspect.isroutine(a))) for a in attributes: if not(a[0].startswith('__') and a[0].endswith('__')): context[a[0]] = a[1] return render(request, self.template_name, context)
def tostring(self): """Returns the validated structure as a string""" stream = StringIO() io = PDBIO() io.set_structure(self.structure) io.save(file=stream) stream.seek(0) contents = stream.read() stream.close() return contents
def renumberResidues( structure, new_pdb ): model = structure[0] i = 1 for chain in model: for residue in chain: residue.id = (' ', i, ' ') i += 1 w = PDBIO() w.set_structure(structure) w.save(new_pdb) return structure
def renumberResidues( structure ): model = structure[0] i = 1 for chain in model: for residue in chain: residue.id = (' ', i, ' ') i+=1 new_pdb = 'Renumbered_Structure.pdb' w = PDBIO() w.set_structure(structure) w.save(new_pdb) return (new_pdb, structure)
def save(self): if not self.struct: raise Exception('self.struct was not defined! Can not save a pdb!') class BpSelect(Select): def accept_residue(self, residue): if residue.get_id()[1] == 1 or residue.get_id()[1] == 43: return 1 else: return 0 io = PDBIO() io.set_structure(self.struct) fn = self.name + '.pdb' io.save(fn, BpSelect()) return 'Saved to: %s ' % fn
def renumberResidues(structure, pdb_name): i = 1 for model in structure: # print(model) for chain in model: for residue in chain: residue.id = (' ', i, ' ') i += 1 new_pdb = pdb_name + '_clean.pdb' w = PDBIO() w.set_structure(structure) w.save(new_pdb) return (new_pdb, structure)
def test_pdbio_write_residue(self): """Write a single residue using PDBIO""" io = PDBIO() struct1 = self.structure residue1 = list(struct1.get_residues())[0] # Write full model to temp file io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) finally: os.remove(filename)
def Renumber_resid(pdb_file): from Bio.PDB import PDBParser from Bio.PDB import PDBIO parser=PDBParser() structure=parser.get_structure('Renumbered',pdb_file) for model in structure: i=1 for chain in model: for residue in chain: if residue.id == (' ', i, ' '): pass else: residue.id = (' ', i, ' ') i=i+1 io = PDBIO() io.set_structure(structure) io.save(pdb_file)
def download_and_get_chains(): from Bio.PDB import PDBParser, PDBIO failed = [] pdbs_dict = read_rostdb_entries() io = PDBIO() pdbl = PDBList() for pdb_e, chains in pdbs_dict.items(): for chain_e in chains: try: pdbl.retrieve_pdb_file(pdb_e, pdir='./') pdb = PDBParser().get_structure(pdb_e, 'pdb'+pdb_e.lower()+'.ent') for chain in pdb.get_chains(): if chain.get_id() == chain_e: io.set_structure(chain) io.save(pdb.get_id() + '_' + chain.get_id() + '.pdb') except: failed.append((pdb_e, chain_e)) print("failures:", failed)
def removeHetero():# Remove all heteroatoms from a pdb and save the new structure in pdbname_noHetero.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() for model in structure: for chain in model: for residue in chain: id = residue.id if id[0] != ' ': chain.detach_child(residue.id) if len(chain) == 0: model.detach_child(chain.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noHetero.pdb')
def renameChain(): parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() what_chain=raw_input('What is the chain you want to rename : ') what_chain2=raw_input('What is the new name of this chain : ') for model in structure: for chain in model: if chain.id == what_chain: chain.id = what_chain2 w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_rename.pdb')
def save(self, output_dir, verbose=True): """Save structures and motifs """ folder_to_save = output_dir + os.sep # ugly hack 'rp14/' try: os.makedirs(folder_to_save) except OSError: pass try: os.mkdir(folder_to_save + 'structures') except OSError: pass try: os.mkdir(folder_to_save + 'motifs') except OSError: pass RESI = self.residues if not self.struc: raise Exception('self.struct was not defined! Can not save a pdb!') class BpSelect(Select): def accept_residue(self, residue): if residue.get_id()[1] in RESI: return 1 else: return 0 io = PDBIO() ## io.set_structure(self.struc) ## fn = folder_to_save + 'structures' + os.sep + self.fn #+ '.pdb' ## io.save(fn) ## if verbose: ## print(' saved to struc: %s ' % fn) io = PDBIO() io.set_structure(self.struc) fn = folder_to_save + 'motifs/' + os.sep + self.fn #+ self.fn.replace('.pdb', '_motif.pdb')# #+ '.pdb' io.save(fn, BpSelect()) if verbose: print(' saved to motifs: %s ' % fn) return fn
def deleteResidue():# Delete a residue from a pdb and save the new structure in pdbname_noResidue.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() rm_residue=raw_input('What residue you want to delete : ') for model in structure: for chain in model: for residue in chain: print residue.id if(residue.id[1]==rm_residue): print 'HELLO' chain.detach_child(residue.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noResidue.pdb')
def test_pdbio_missing_occupancy(self): """Write PDB file with missing occupancy""" from Bio import BiopythonWarning warnings.simplefilter('ignore', BiopythonWarning) io = PDBIO() structure = self.parser.get_structure("test", "PDB/occupancy.pdb") io.set_structure(structure) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struct2 = self.parser.get_structure("test", filename) atoms = struct2[0]['A'][(' ', 152, ' ')] self.assertEqual(atoms['N'].get_occupancy(), None) finally: os.remove(filename) warnings.filters.pop()