def deleteChain():# Delete a complete chain from a pdb and save the new structure in pdbname_free.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() seq='' nb_chain=input('How many chain do you want to delete : ') for i in range(nb_chain): rm_chain=raw_input('What chain you want to delete : ') for model in structure: for chain in model: if(chain.id==rm_chain): model.detach_child(chain.id) pept = raw_input('Do you want to get a pdb with the sequence in its name : ') if(pept == 'y'): ppb=PPBuilder() for pp in ppb.build_peptides(structure): seq = seq + pp.get_sequence() seq=seq.lower() seq=str(seq) w = PDBIO() w.set_structure(structure) w.save(seq+'_bound.pdb') else: w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_without'+rm_chain+'.pdb')
def RemoveLigandsOneBioUnit(biounit, ligandlist): # ligandlist is a residue list with residue chain id, name and residue number p = PDBParser(PERMISSIVE = 1) pdbname= biounit.split("/")[-1] try: models = p.get_structure(pdbname, biounit) except: return None #for model in models: # for chain in model: # for residue in chain: # print residue for rligand in ligandlist: for model in models: for chain in model: for residue in list(chain): if chain.id == rligand["ChainID"] and int(rligand["ResNum"]) == residue.id[1]: chain.detach_child(residue.id) elif residue.id[0] == "W": chain.detach_child(residue.id) elif len(rligand["LigName"].split()) > 1 and int(rligand["ResNum"]) <= residue.id[1]: LongLigand(chain, residue, rligand) io = PDBIO() io.set_structure(models) filepath = os.path.join(BIOSTRDIR, models.id) io.save(filepath)
def selectChain(ifn, ofn, chainID='A'): parser = PDBParser() structure = parser.get_structure('x', ifn) class ChainSelector(): def __init__(self, chainID=chainID): self.chainID = chainID def accept_chain(self, chain): if chain.get_id() == self.chainID: return 1 return 0 def accept_model(self, model): return 1 def accept_residue(self, residue): return 1 def accept_atom(self, atom): return 1 sel = ChainSelector(chainID) io = PDBIO() io.set_structure(structure) io.save(ofn, sel)
def test_pdbio_select(self): """Write a selection of the structure using a Select subclass""" # Selection class to filter all alpha carbons class CAonly(Select): """ Accepts only CA residues """ def accept_atom(self, atom): if atom.name == "CA" and atom.element == "C": return 1 io = PDBIO() struct1 = self.structure # Write to temp file io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename, CAonly()) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 70) finally: os.remove(filename)
def get(self, request, *args, **kwargs): if self.kwargs['substructure'] == 'select': return HttpResponseRedirect('/structure/pdb_segment_selection') if self.kwargs['substructure'] == 'full': out_stream = request.session['cleaned_structures'] elif self.kwargs['substructure'] == 'custom': simple_selection = request.session.get('selection', False) selection = Selection() if simple_selection: selection.importer(simple_selection) io = PDBIO() zipf_in = zipfile.ZipFile(request.session['cleaned_structures'], 'r') out_stream = BytesIO() zipf_out = zipfile.ZipFile(out_stream, 'w', zipfile.ZIP_DEFLATED) for name in zipf_in.namelist(): tmp = StringIO() io.set_structure(PDBParser(QUIET=True).get_structure(name, StringIO(zipf_in.read(name).decode('utf-8')))[0]) io.save(tmp, SubstructureSelector(request.session['substructure_mapping'], parsed_selection=SelectionParser(selection))) zipf_out.writestr(name, tmp.getvalue()) zipf_in.close() zipf_out.close() del request.session['substructure_mapping'] if len(out_stream.getvalue()) > 0: response = HttpResponse(content_type="application/zip") response['Content-Disposition'] = 'attachment; filename="pdb_structures.zip"' response.write(out_stream.getvalue()) return response
def test_conversion(self): """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare""" cif_parser = MMCIFParser(QUIET=1) cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif") pdb_writer = PDBIO() pdb_writer.set_structure(cif_struct) filenumber, filename = tempfile.mkstemp() pdb_writer.save(filename) pdb_parser = PDBParser(QUIET=1) pdb_struct = pdb_parser.get_structure('example_pdb', filename) # comparisons self.assertEqual(len(pdb_struct), len(cif_struct)) pdb_atom_names = [a.name for a in pdb_struct.get_atoms()] cif_atom_names = [a.name for a in cif_struct.get_atoms()] self.assertEqual(len(pdb_atom_names), len(cif_atom_names)) self.assertSequenceEqual(pdb_atom_names, cif_atom_names) pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()] cif_atom_elems = [a.element for a in cif_struct.get_atoms()] self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
def execute_freesasa(structure, selection=None): """ Runs the freesasa executable on a PDB file. You can get the executable from: https://github.com/mittinatten/freesasa The binding affinity models are calibrated with the parameter set for vdW radii used in NACCESS: http://www.ncbi.nlm.nih.gov/pubmed/994183 """ io = PDBIO() freesasa, param_f= FREESASA_BIN, FREESASA_PAR if not os.path.isfile(freesasa): raise IOError('[!] freesasa binary not found at `{0}`'.format(freesasa)) if not os.path.isfile(param_f): raise IOError('[!] Atomic radii file not found at `{0}`'.format(param_f)) # Rewrite PDB using Biopython to have a proper format # freesasa is very picky with line width (80 characters or fails!) # Select chains if necessary class ChainSelector(Select): """Selector class to filter for specific chains""" def accept_chain(self, chain): """Returns True for chains within the selection""" if selection and chain.id in selection: return 1 elif not selection: return 1 else: return 0 _pdbf = tempfile.NamedTemporaryFile() io.set_structure(structure) io.save(_pdbf.name, ChainSelector()) # Run freesasa # Save atomic asa output to another temp file _outf = tempfile.NamedTemporaryFile() cmd = '{0} --B-value-file={1} -c {2} {3}'.format(freesasa, _outf.name, param_f, _pdbf.name) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode: print('[!] freesasa did not run successfully', file=sys.stderr) print(cmd, file=sys.stderr) raise Exception(stderr) # Rewind & Parse results file # Save _outf.seek(0) asa, rsa = parse_freesasa_output(_outf) _pdbf.close() _outf.close() return asa, rsa
def pushToPDB(self, path, *keys): seq_list = [self.getRegion(key) for key in keys] if not all(seq_list): self.printerr('pushToPDB: RESIDUE LIST IS EMPTY\n') return 0 io = PDBIO() io.set_structure(self.__struct) io.save(path + '/' +self.__name + ".pdb", self._ResSelect(*seq_list)) return 1
def save_structure(self, filename="barnacle.pdb"): """ Save the atomic coordinates of the sampled structure as a PDB file. @param filename: The filename of the PDB file to save. @type filename: string """ pdbio = PDBIO() pdbio.set_structure(self.structure) pdbio.save(filename)
def clean_pdb(pdb_file, pdb_chain, out_dir): out_dir_chain = out_dir + '/' + 'chain' if not os.path.isfile(pdb_file): raise argparse.ArgumentTypeError("PDB file could not be found.") # Create output directories if they do not already exist if not os.path.exists(out_dir): os.makedirs(out_dir) if not os.path.exists(out_dir_chain): os.makedirs(out_dir_chain) # Grab PDB name and make sure it's converted to uppercase pdb_name = os.path.basename(pdb_file).split('.')[0].upper() # Extract chain of interest structure = parsePDBStructure(pdb_file) # Make sure chain exists, otherwise throw an error try: chain = structure[0][pdb_chain] except KeyError: print("\nERROR:\n\n\t"+pdb_name+": chain "+pdb_chain+" could not be found.\n") return io = PDBIO() chain_select = ChainSelect(pdb_chain) io.set_structure(structure) pdb_chain_file = out_dir_chain+'/'+pdb_name+'_'+pdb_chain+'_temp.pdb' io.save(pdb_chain_file, chain_select) # Remove HetAtoms temp_file = out_dir + "/" + pdb_name + "_temp.pdb" removeHetAtoms(pdb_file, temp_file) temp_file_chain = out_dir_chain + "/" + pdb_name + '_' + pdb_chain + "_temp2.pdb" removeHetAtoms(pdb_chain_file, temp_file_chain) # Renumber PDB structure = parsePDBStructure(temp_file) (new_pdb, renumbered_pdb) = renumberResidues(structure, pdb_name) structure_chain = parsePDBStructure(temp_file_chain) (new_pdb_chain, renumbered_pdb) = renumberResidues(structure_chain, pdb_name + '_' + pdb_chain) # Remove waters removeWaters(new_pdb, out_dir + "/" + pdb_name + ".pdb") removeWaters(new_pdb_chain, out_dir_chain+'/'+pdb_name+'_'+pdb_chain+'.pdb') # Clean up temporary files os.remove(pdb_chain_file) os.remove(temp_file) os.remove(temp_file_chain) os.remove(new_pdb_chain) os.remove(new_pdb)
def get(self, request, *args, **kwargs): if self.kwargs['substructure'] == 'custom': return HttpResponseRedirect('/structure/generic_numbering_selection') simple_selection = self.request.session.get('selection', False) selection = Selection() if simple_selection: selection.importer(simple_selection) out_stream = StringIO() io = PDBIO() request.session['gn_outfile'].seek(0) gn_struct = PDBParser(PERMISSIVE=True, QUIET=True).get_structure(request.session['gn_outfname'], request.session['gn_outfile'])[0] if self.kwargs['substructure'] == 'full': io.set_structure(gn_struct) io.save(out_stream) if self.kwargs['substructure'] == 'substr': io.set_structure(gn_struct) io.save(out_stream, GenericNumbersSelector(parsed_selection=SelectionParser(selection))) root, ext = os.path.splitext(request.session['gn_outfname']) response = HttpResponse(content_type="chemical/x-pdb") response['Content-Disposition'] = 'attachment; filename="{}_GPCRDB.pdb"'.format(root) response.write(out_stream.getvalue()) return response
def save_superimposed_pdb(self, out_filename): """ Saves the superimposed PDB in the given output filename. """ if self.__valid_alignment(): superimposed_pdb = self.__create_superimposed_pdb() # save it to a file io = PDBIO() io.set_structure(superimposed_pdb) io.save(out_filename)
def removeDoubleAtoms():# Remove all double atoms defined in a pdb and save the new structure in pdbname_noDouble.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() structure.remove_disordered_atoms() w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noDouble.pdb')
def post(self, request): root, ext = os.path.splitext(request.FILES['pdb_file'].name) generic_numbering = GenericNumbering(StringIO(request.FILES['pdb_file'].file.read().decode('UTF-8',"ignore"))) out_struct = generic_numbering.assign_generic_numbers() out_stream = StringIO() io = PDBIO() io.set_structure(out_struct) io.save(out_stream) print(len(out_stream.getvalue())) # filename="{}_GPCRdb.pdb".format(root) return Response(out_stream.getvalue())
def save_ligand(structure, filename): # Saves ligand to a filename.pdb Select = Bio.PDB.Select class LigandSelect(Select): def accept_residue(self, residue): for group in ligands.values(): if residue in group: return 1 else: return 0 io=PDBIO() io.set_structure(structure) io.save(filename+'.pdb', LigandSelect())
def post(self, request, *args, **kwargs): context = super(PDBClean, self).get_context_data(**kwargs) self.posted = True pref = True water = False hets = False if 'pref_chain' not in request.POST.keys(): pref = False if 'water' in request.POST.keys(): water = True if 'hets' in request.POST.keys(): hets = True # get simple selection from session simple_selection = request.session.get('selection', False) selection = Selection() if simple_selection: selection.importer(simple_selection) out_stream = BytesIO() io = PDBIO() zipf = zipfile.ZipFile(out_stream, 'w', zipfile.ZIP_DEFLATED) if selection.targets != []: for selected_struct in [x for x in selection.targets if x.type == 'structure']: struct_name = '{}_{}.pdb'.format(selected_struct.item.protein_conformation.protein.parent.entry_name, selected_struct.item.pdb_code.index) if hets: lig_names = [x.pdb_reference for x in StructureLigandInteraction.objects.filter(structure=selected_struct.item, annotated=True)] else: lig_names = None gn_assigner = GenericNumbering(structure=PDBParser(QUIET=True).get_structure(struct_name, StringIO(selected_struct.item.get_cleaned_pdb(pref, water, lig_names)))[0]) tmp = StringIO() io.set_structure(gn_assigner.assign_generic_numbers()) request.session['substructure_mapping'] = gn_assigner.get_substructure_mapping_dict() io.save(tmp) zipf.writestr(struct_name, tmp.getvalue()) del gn_assigner, tmp for struct in selection.targets: selection.remove('targets', 'structure', struct.item.id) # export simple selection that can be serialized simple_selection = selection.exporter() request.session['selection'] = simple_selection request.session['cleaned_structures'] = out_stream attributes = inspect.getmembers(self, lambda a:not(inspect.isroutine(a))) for a in attributes: if not(a[0].startswith('__') and a[0].endswith('__')): context[a[0]] = a[1] return render(request, self.template_name, context)
def tostring(self): """Returns the validated structure as a string""" stream = StringIO() io = PDBIO() io.set_structure(self.structure) io.save(file=stream) stream.seek(0) contents = stream.read() stream.close() return contents
def renumberResidues( structure, new_pdb ): model = structure[0] i = 1 for chain in model: for residue in chain: residue.id = (' ', i, ' ') i += 1 w = PDBIO() w.set_structure(structure) w.save(new_pdb) return structure
def renumberResidues( structure ): model = structure[0] i = 1 for chain in model: for residue in chain: residue.id = (' ', i, ' ') i+=1 new_pdb = 'Renumbered_Structure.pdb' w = PDBIO() w.set_structure(structure) w.save(new_pdb) return (new_pdb, structure)
def save(self): if not self.struct: raise Exception('self.struct was not defined! Can not save a pdb!') class BpSelect(Select): def accept_residue(self, residue): if residue.get_id()[1] == 1 or residue.get_id()[1] == 43: return 1 else: return 0 io = PDBIO() io.set_structure(self.struct) fn = self.name + '.pdb' io.save(fn, BpSelect()) return 'Saved to: %s ' % fn
def renumberResidues(structure, pdb_name): i = 1 for model in structure: # print(model) for chain in model: for residue in chain: residue.id = (' ', i, ' ') i += 1 new_pdb = pdb_name + '_clean.pdb' w = PDBIO() w.set_structure(structure) w.save(new_pdb) return (new_pdb, structure)
def test_pdbio_write_residue(self): """Write a single residue using PDBIO""" io = PDBIO() struct1 = self.structure residue1 = list(struct1.get_residues())[0] # Write full model to temp file io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) finally: os.remove(filename)
def post (self, request, *args, **kwargs): frag_sp = FragmentSuperpose(StringIO(request.FILES['pdb_file'].file.read().decode('UTF-8', 'ignore')),request.FILES['pdb_file'].name) superposed_fragments = [] superposed_fragments_repr = [] if request.POST['similarity'] == 'identical': if request.POST['representative'] == 'any': superposed_fragments = frag_sp.superpose_fragments() else: superposed_fragments_repr = frag_sp.superpose_fragments(representative=True, state=request.POST['state']) superposed_fragments = frag_sp.superpose_fragments() else: if request.POST['representative'] == 'any': superposed_fragments = frag_sp.superpose_fragments(use_similar=True) else: superposed_fragments_repr = frag_sp.superpose_fragments(representative=True, use_similar=True, state=request.POST['state']) superposed_fragments = frag_sp.superpose_fragments(use_similar=True) if superposed_fragments == []: self.message = "No fragments were aligned." else: io = PDBIO() out_stream = BytesIO() zipf = zipfile.ZipFile(out_stream, 'a') for fragment, pdb_data in superposed_fragments: io.set_structure(pdb_data) tmp = StringIO() io.save(tmp) if request.POST['representative'] == 'any': zipf.writestr(fragment.generate_filename(), tmp.getvalue()) else: zipf.writestr("all_fragments//{!s}".format(fragment.generate_filename()), tmp.getvalue()) if superposed_fragments_repr != []: for fragment, pdb_data in superposed_fragments_repr: io.set_structure(pdb_data) tmp = StringIO() io.save(tmp) zipf.writestr("representative_fragments//{!s}".format(fragment.generate_filename()), tmp.getvalue()) zipf.close() if len(out_stream.getvalue()) > 0: request.session['outfile'] = { 'interacting_moiety_residue_fragments.zip' : out_stream, } self.outfile = 'interacting_moiety_residue_fragments.zip' self.success = True self.zip = 'zip' self.message = '{:n} fragments were superposed.'.format(len(superposed_fragments)) context = super(FragmentSuperpositionResults, self).get_context_data(**kwargs) attributes = inspect.getmembers(self, lambda a:not(inspect.isroutine(a))) for a in attributes: if not(a[0].startswith('__') and a[0].endswith('__')): context[a[0]] = a[1] return render(request, self.template_name, context)
def Renumber_resid(pdb_file): from Bio.PDB import PDBParser from Bio.PDB import PDBIO parser=PDBParser() structure=parser.get_structure('Renumbered',pdb_file) for model in structure: i=1 for chain in model: for residue in chain: if residue.id == (' ', i, ' '): pass else: residue.id = (' ', i, ' ') i=i+1 io = PDBIO() io.set_structure(structure) io.save(pdb_file)
def renameChain(): parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() what_chain=raw_input('What is the chain you want to rename : ') what_chain2=raw_input('What is the new name of this chain : ') for model in structure: for chain in model: if chain.id == what_chain: chain.id = what_chain2 w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_rename.pdb')
def download_and_get_chains(): from Bio.PDB import PDBParser, PDBIO failed = [] pdbs_dict = read_rostdb_entries() io = PDBIO() pdbl = PDBList() for pdb_e, chains in pdbs_dict.items(): for chain_e in chains: try: pdbl.retrieve_pdb_file(pdb_e, pdir='./') pdb = PDBParser().get_structure(pdb_e, 'pdb'+pdb_e.lower()+'.ent') for chain in pdb.get_chains(): if chain.get_id() == chain_e: io.set_structure(chain) io.save(pdb.get_id() + '_' + chain.get_id() + '.pdb') except: failed.append((pdb_e, chain_e)) print("failures:", failed)
def removeHetero():# Remove all heteroatoms from a pdb and save the new structure in pdbname_noHetero.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() for model in structure: for chain in model: for residue in chain: id = residue.id if id[0] != ' ': chain.detach_child(residue.id) if len(chain) == 0: model.detach_child(chain.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noHetero.pdb')
def test_pdbio_missing_occupancy(self): """Write PDB file with missing occupancy""" from Bio import BiopythonWarning warnings.simplefilter('ignore', BiopythonWarning) io = PDBIO() structure = self.parser.get_structure("test", "PDB/occupancy.pdb") io.set_structure(structure) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struct2 = self.parser.get_structure("test", filename) atoms = struct2[0]['A'][(' ', 152, ' ')] self.assertEqual(atoms['N'].get_occupancy(), None) finally: os.remove(filename) warnings.filters.pop()
def deleteResidue():# Delete a residue from a pdb and save the new structure in pdbname_noResidue.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() rm_residue=raw_input('What residue you want to delete : ') for model in structure: for chain in model: for residue in chain: print residue.id if(residue.id[1]==rm_residue): print 'HELLO' chain.detach_child(residue.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noResidue.pdb')
def save(self, output_dir, verbose=True): """Save structures and motifs """ folder_to_save = output_dir + os.sep # ugly hack 'rp14/' try: os.makedirs(folder_to_save) except OSError: pass try: os.mkdir(folder_to_save + 'structures') except OSError: pass try: os.mkdir(folder_to_save + 'motifs') except OSError: pass RESI = self.residues if not self.struc: raise Exception('self.struct was not defined! Can not save a pdb!') class BpSelect(Select): def accept_residue(self, residue): if residue.get_id()[1] in RESI: return 1 else: return 0 io = PDBIO() io.set_structure(self.struc) fn = folder_to_save + 'structures' + os.sep + self.fn #+ '.pdb' io.save(fn) if verbose: print(' saved to struc: %s ' % fn) io = PDBIO() io.set_structure(self.struc) fn = folder_to_save + 'motifs/' + os.sep + self.fn #+ self.fn.replace('.pdb', '_motif.pdb')# #+ '.pdb' io.save(fn, BpSelect()) if verbose: print(' saved to motifs: %s ' % fn)
def build_complex(file_1, file_2): """ This function takes the complex output file (or in the first iteration one of the pairwise interactions) and another pairwise interaction PDB complex. Then it tries to add the chain to the complex until there is not clash @ Input - Two file path for a PDB interactions. @ Output - File path of the complex PDB file / Error: Chain cannot be added. """ parser = PDBParser(PERMISSIVE=1) structure_1 = parser.get_structure('Complex', file_1) structure_2 = parser.get_structure('Complex', file_2) sup = Superimposer() io = PDBIO() atoms_fixed, atoms_moving = Compute_equal_chain(structure_1, structure_2) try: sup.set_atoms(atoms_fixed, atoms_moving) except: return False sup.apply(list(structure_2.get_atoms())) for chain in structure_2[0].get_chains(): if chain.id != list(atoms_moving)[0].get_full_id()[2]: moved_chain = chain if check_clash(structure_1, moved_chain): with open(file_1, "wt") as out_file: for model in list(structure_1.get_chains()) + [moved_chain]: io.set_structure(model) io.save(out_file) rename_complex_chains(file_1) return True return False
def retrieve_pdb_chain(pdbdir, MDL=0, chain_name='A', write=0, outpath=None): warnings.simplefilter('ignore', BiopythonWarning) pdbid = pdbdir.split('/')[-1][0:4] parser = PDBParser(PERMISSIVE=1) structure = parser.get_structure(pdbid, pdbdir) model = structure[MDL] if write == 1: if outpath == None: raise RuntimeError('out path is None!') os.makedirs(outpath, exist_ok=True) class ModelSelect(Select): def accept_model(self, model): if model.get_id() == 0: return True else: return False def accept_chain(self, chain): """Overload this to reject chains for output.""" if chain.get_id() == chain_name: return True else: return False def accept_residue(self, residue): if residue.get_id()[0] == ' ': return True else: return False def accept_atom(self, atom): """Overload this to reject atoms for output.""" return 1 io = PDBIO() io.set_structure(structure) io.save('%s/%s.pdb' % (outpath, pdbid), ModelSelect(), preserve_atom_numbering=True) return model
def test_model_numbering(self): """Preserve model serial numbers during I/O.""" def confirm_numbering(struct): self.assertEqual(len(struct), 20) for idx, model in enumerate(struct): self.assertTrue(model.serial_num, idx + 1) self.assertTrue(model.serial_num, model.id + 1) parser = PDBParser() struct1 = parser.get_structure("1mot", "PDB/1MOT.pdb") confirm_numbering(struct1) # Round trip: serialize and parse again io = PDBIO() io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struct2 = parser.get_structure("1mot", filename) confirm_numbering(struct2) finally: os.remove(filename)
def split_pdb_by_chain(pdb_id): if not os.path.isdir("pdb_chains/" + pdb_id.upper()): os.mkdir("pdb_chains/" + pdb_id.upper()) actual_pdbfile = PDBParser().get_structure( pdb_id, "ent_files/pdb" + pdb_id.lower() + ".ent") return_dict = dict() for model in actual_pdbfile: for chain in model: outfilename = pdb_id.upper() + "-" + str( model.get_id() + 1) + "_" + str(chain.get_id()) + ".pdb" if not os.path.isfile("pdb_chains/" + pdb_id.upper() + "/" + outfilename): io = PDBIO() io.set_structure(chain) io.save("pdb_chains/" + pdb_id.upper() + "/" + outfilename) ppb = PPBuilder().build_peptides(chain) this_seq = Seq("", generic_protein) for pp in ppb: this_seq += pp.get_sequence() return_dict[outfilename] = this_seq return return_dict
def structure_filtered_dca_truncate_pdb(pdb_id, chain_id, start, end, pdb_directory=os.path.join( ".", "pdbs")): from Bio.PDB import PDBIO io = PDBIO() structure = structure_filtered_dca_parse_pdb(pdb_id) structure_selector = structure_filtered_dca_pfam_is_in_structure_selection( chain_id, start, end) io.set_structure(structure) io.save(os.path.join(pdb_directory, "%s%s_%d-%d.pdb" % (pdb_id, chain_id, start, end)), select=structure_selector)
def assembleChain(): # Allow to assemble 2 chains together parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() what_chain=raw_input('What is the 1st chain you want to assemble : ') what_chain2=raw_input('What is the 2nd chain you want to assemble : ') for model in structure: for chain in model: if chain.id == what_chain: parent=chain; elif chain.id == what_chain2: for residue in chain: residue.get_parent().id=what_chain w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_assemble.pdb')
def test_pdbio_write_custom_residue(self): """Write a chainless residue using PDBIO""" io = PDBIO() res = Residue.Residue((' ', 1, ' '), 'DUM', '') atm = Atom.Atom('CA', [0.1, 0.1, 0.1], 1.0, 1.0, ' ', 'CA', 1, 'C') res.add(atm) # Write full model to temp file io.set_structure(res) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struct2 = self.parser.get_structure("res", filename) latoms = list(struct2.get_atoms()) self.assertEqual(len(latoms), 1) self.assertEqual(latoms[0].name, 'CA') self.assertEqual(latoms[0].parent.resname, 'DUM') self.assertEqual(latoms[0].parent.parent.id, 'A') finally: os.remove(filename)
def get_sequence(pdb, chain, first, last, output): pdb_parser = PDBParser(PERMISSIVE=0) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb,pdb) pdb_chain = pdb_structure[0][chain] ppb=PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain): Sequence = Sequence + pp.get_sequence() io = PDBIO() io.set_structure(pdb_structure) # if pdb[-5] == chain: # output = pdb # else: # output = pdb[:-4]+chain+".pdb" ### writing out sequence to fasta # out = open(output[:-4]+".fasta.txt","w") # out.write(">"+output[:-4]+"\n") # out.write(str(Sequence[first-1: last-2])+"\n") # out.close() io.save(output,SelectDomain(chain, first, last))
def create_structure(coords, pdb_type, remove_masked): """Create the structure. Args: coords: 3D coordinates of structure pdb_type: predict or actual structure remove_masked: whether to include masked atoms. If false, the masked atoms have coordinates of [0,0,0]. Returns: structure """ name = protein.id_ structure = Structure(name) model = Model(0) chain = Chain('A') for i, residue in enumerate(protein.primary): residue = AA_LETTERS[residue] if int(protein.mask[i]) == 1 or remove_masked == False: new_residue = Residue((' ', i + 1, ' '), residue, ' ') j = 3 * i atom_list = ['N', 'CA', 'CB'] for k, atom in enumerate(atom_list): new_atom = Atom(name=atom, coord=coords[j + k, :], bfactor=0, occupancy=1, altloc=' ', fullname=" {} ".format(atom), serial_number=0) new_residue.add(new_atom) chain.add(new_residue) model.add(chain) structure.add(model) io = PDBIO() io.set_structure(structure) io.save(save_dir + name + '_' + pdb_type + '.pdb') return structure
def test_conversion(self): """Parse 1LCD.cif, write 1LCD.pdb, parse again and compare.""" cif_parser = MMCIFParser(QUIET=1) cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif") pdb_writer = PDBIO() pdb_writer.set_structure(cif_struct) filenumber, filename = tempfile.mkstemp() pdb_writer.save(filename) pdb_parser = PDBParser(QUIET=1) pdb_struct = pdb_parser.get_structure("example_pdb", filename) # comparisons self.assertEqual(len(pdb_struct), len(cif_struct)) pdb_atom_names = [a.name for a in pdb_struct.get_atoms()] cif_atom_names = [a.name for a in cif_struct.get_atoms()] self.assertEqual(pdb_atom_names, cif_atom_names) pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()] cif_atom_elems = [a.element for a in cif_struct.get_atoms()] self.assertEqual(pdb_atom_elems, cif_atom_elems)
def test_model_numbering(self): """Preserve model serial numbers during I/O.""" tmp_path = "PDB/tmp.pdb" def confirm_numbering(struct): self.assertEqual(len(struct), 20) for idx, model in enumerate(struct): self.assertTrue(model.serial_num, idx + 1) self.assertTrue(model.serial_num, model.id + 1) parser = PDBParser() struct1 = parser.get_structure("1mot", "PDB/1MOT.pdb") confirm_numbering(struct1) # Round trip: serialize and parse again io = PDBIO() io.set_structure(struct1) try: io.save(tmp_path) struct2 = parser.get_structure("1mot", tmp_path) confirm_numbering(struct2) finally: if os.path.isfile(tmp_path): os.remove(tmp_path)
def aa_pdb(pocket): with open(pocket) as pk: aas = [] for line in pk.readlines(): if line.startswith('ATOM'): aas.append(line.split()[5]) aas = list(set(aas)) os.chdir(path / protein) protein_name = name + '_out.pdb' parser = PDBParser() structure = parser.get_structure(name, protein_name) class Pocket(Select): def accept_residue(self, residue): if str(residue.get_id()[1]) in aas: return 1 else: return 0 io = PDBIO() io.set_structure(structure) pocket_name = name + pocket.replace('pocket','_').replace('_atm', '') os.chdir(out) io.save(pocket_name, Pocket())
def renumberChain(): # Allow to renumber from what you want a specific chain parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() what_chain=raw_input('What is the chain you want to renumber : ') number=input('What is the first number of the chain : ') for model in structure: for chain in model: if chain.id == what_chain: for residue in chain: if residue.id[0] == ' ': residue.id=(' ', number, ' ') number=number+1 else: chain.detach_child(residue.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_ren.pdb')
def extract_ligand(self, save_path): # 拿到model对象 LigandExtractor.model = LigandExtractor.structure[ LigandExtractor.model_num] # 拿到chain对象 LigandExtractor.chain = LigandExtractor.model[ LigandExtractor.chain_name] # 拿到配体对象 for protein_res in self.chain.child_list: if LigandExtractor.residue_name == protein_res.resname: LigandExtractor.ligand = protein_res class LigandSelect(Select): def accept_model(self, model): if model == LigandExtractor.model: return 1 else: return 0 def accept_chain(self, chain): if chain == LigandExtractor.chain: return 1 else: return 0 def accept_residue(self, residue): if residue == LigandExtractor.ligand: return 1 else: return 0 io = PDBIO() io.set_structure(self.structure) io.save( save_path + os.sep + "%s_%s.pdb" % (LigandExtractor.chain_name, LigandExtractor.residue_name), LigandSelect())
def constrain(options): p = PDBParser() chains_to_choose = [] if (options.file): pdb_file = options.file structure = p.get_structure("input", pdb_file) print "Input file = ", pdb_file for chain in options.chains: chains_to_choose.append(chain) Select = Bio.PDB.Select class ConstrSelect(Select): def accept_chain(self, chain): #print dir(residue) if chain.id in chains_to_choose: return 1 else: return 0 w = PDBIO() w.set_structure(structure) w.save(options.output, ConstrSelect())
def get_sequence(pdb, chain): pdb_parser = PDBParser( PERMISSIVE=0 ) # The PERMISSIVE instruction allows PDBs presenting errors. pdb_structure = pdb_parser.get_structure(pdb, pdb) pdb_chain = pdb_structure[0][chain] ppb = PPBuilder() Sequence = "" for pp in ppb.build_peptides(pdb_chain): Sequence = Sequence + pp.get_sequence() start = [residue.id[1] for residue in pdb_chain][0] if start is not 1: for residue in pdb_chain: residue.id = (' ', residue.id[1] - start + 1, ' ') io = PDBIO() io.set_structure(pdb_structure) # output = pdb[-8:-4] +"_"+chain+".pdb" output = "renumbered_" + pdb # out = open(output[:-4]+".fasta.txt","w") # out.write(">"+pdb[-8:-4]+"_"+chain+"\n") # out.write(str(Sequence)) # out.close() io.save(output, SelectChains(chain))
def CheckInputStructures(pdb): pdb_name = pdb.split('/')[-1] pdb_id = pdb_name.split('.')[0] hom_dir = pdb.split('{0}'.format(pdb_name))[0] m = p.get_structure(pdb_id, pdb) print('\n #1# Superpose Info: Input PDB {0} has {1:2d} chain(s)'.format( pdb_name, len(m.get_chains()))) Targets = [] # get individual chains in PDB and check for chain in m.get_chains(): chain_id = chain.get_id() Res = chain.get_residues() if len([r for r in Res if not re.search(r'H_|W', r.get_id()[0])]) < 220: print( '\n \033[31m#2# Superpose Warning:\033[0m {0}_{1} has < 220 residues, unlikely a kinase. Skip this chain.' .format(pdb_id, chain_id)) else: if re.search(r'_', pdb_id): if re.search('{}'.format(chain_id), pdb_id.split('_')[-1]): new_pdb = '{0}/{1}.pdb'.format(hom_dir, pdb_id, chain_id) else: new_pdb = '{0}/{1}_{2}.pdb'.format(hom_dir, pdb_id, chain_id) else: new_pdb = '{0}/{1}_{2}.pdb'.format(hom_dir, pdb_id, chain_id) w = PDBIO() w.set_structure(chain) w.save(new_pdb) Targets.append(new_pdb) return Targets
def create_chain_pdb(FASTA_FILE, PDB_PROTEIN_PATH, PDB_CHAIN_PATH): """Create a new pdb file of one chain Parameters ---------- FASTA_FILE : str The file location of the pisces file PDB_PROTEIN_PATH : str The directory location of full pdb PDB_CHAIN_PATH : str The directory location of chain pdb """ # get fasta records records = list(SeqIO.parse(FASTA_FILE, "fasta")) for record in records: chain_char = record.id[-1] protein_id = record.id[:-1] chain_id = record.id pdb_protein_file = (PDB_PROTEIN_PATH + protein_id + '.cif').lower() pdb_chain_file = (PDB_CHAIN_PATH + record.id + '.ent') if os.path.isfile(pdb_chain_file): print("File exist", pdb_chain_file) continue try: #Filter for one chain and create a new pdb file p = MMCIFParser(QUIET=1) structure = p.get_structure(protein_id, pdb_protein_file) io_w_no_h = PDBIO() io_w_no_h.set_structure(structure) io_w_no_h.save(pdb_chain_file, ChainSelect(chain_char)) print('Saved', chain_id) except: print('Error', pdb_protein_file)
def get_simrna_ready(self, renumber_residues=True): """Get simrna_ready .. - take only first model, - renumber residues if renumber_residues=True .. warning:: requires: Biopython""" try: from Bio import PDB from Bio.PDB import PDBIO except: sys.exit( 'Error: Install biopython to use this function (pip biopython)' ) import warnings warnings.filterwarnings( 'ignore', '.*Invalid or missing.*', ) warnings.filterwarnings( 'ignore', '.*with given element *', ) import copy G_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split( ) A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split( ) U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split( ) C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split( ) ftmp = '/tmp/out.pdb' self.write(ftmp, v=False) parser = PDB.PDBParser() struct = parser.get_structure('', ftmp) model = struct[0] s2 = PDB.Structure.Structure(struct.id) m2 = PDB.Model.Model(model.id) chains2 = [] missing = [] for chain in model.get_list(): res = [] for r in chain: res.append(r) res = copy.copy(res) c2 = PDB.Chain.Chain(chain.id) c = 1 # new chain, goes from 1 if renumber True for r in res: # hack for amber/qrna r.resname = r.resname.strip() if r.resname == 'RC3': r.resname = 'C' if r.resname == 'RU3': r.resname = 'U' if r.resname == 'RG3': r.resname = 'G' if r.resname == 'RA3': r.resname = 'A' if r.resname == 'C3': r.resname = 'C' if r.resname == 'U3': r.resname = 'U' if r.resname == 'G3': r.resname = 'G' if r.resname == 'A3': r.resname = 'A' if r.resname == 'RC5': r.resname = 'C' if r.resname == 'RU5': r.resname = 'U' if r.resname == 'RG5': r.resname = 'G' if r.resname == 'RA5': r.resname = 'A' if r.resname == 'C5': r.resname = 'C' if r.resname == 'U5': r.resname = 'U' if r.resname == 'G5': r.resname = 'G' if r.resname == 'A5': r.resname = 'A' if r.resname.strip() == 'RC': r.resname = 'C' if r.resname.strip() == 'RU': r.resname = 'U' if r.resname.strip() == 'RG': r.resname = 'G' if r.resname.strip() == 'RA': r.resname = 'A' r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid) if renumber_residues: r2.id = (r2.id[0], c, r2.id[2]) ## renumber residues if c == 1: p_missing = True #if p_missing: # try: # x = r["O5'"] # x.id = ' P' # x.name = ' P' # x.fullname = ' P' # print "REMARK 000 FIX O5' -> P fix in chain ", chain.id # except: # pass for a in r: if a.id == 'P': p_missing = False if p_missing: currfn = __file__ if currfn == '': path = '.' else: path = os.path.dirname(currfn) if os.path.islink( currfn ): #path + os.sep + os.path.basename(__file__)): path = os.path.dirname( os.readlink(path + os.sep + os.path.basename(currfn))) po3_struc = PDB.PDBParser().get_structure( '', path + '/data/PO3_inner.pdb') po3 = [ po3_atom for po3_atom in po3_struc[0].get_residues() ][0] r_atoms = [r["O4'"], r["C4'"], r["C3'"]] po3_atoms = [po3["O4'"], po3["C4'"], po3["C3'"]] sup = PDB.Superimposer() sup.set_atoms(r_atoms, po3_atoms) rms = round(sup.rms, 3) sup.apply(po3_struc.get_atoms()) # to all atoms of po3 r.add(po3['P']) r.add(po3['OP1']) r.add(po3['OP2']) try: r.add(po3["O5'"]) except: del r["O5'"] r.add(po3["O5'"]) p_missing = False # off this function # save it #io = PDB.PDBIO() #io.set_structure( po3_struc ) #io.save("po3.pdb") if str(r.get_resname()).strip() == "G": for an in G_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "A": for an in A_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "C": for an in C_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "U": for an in U_ATOMS: if c == 1 and ignore_op3: if an in ['P', 'OP1', 'OP2']: continue try: if c == 1 and an == "O5'" and p_missing: r2.add(x) else: r2.add(r[an]) except KeyError: #print 'Missing:', an, r,' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) c += 1 chains2.append(c2) io = PDBIO() s2.add(m2) for chain2 in chains2: m2.add(chain2) #print c2 #print m2 io.set_structure(s2) #fout = fn.replace('.pdb', '_fx.pdb') fout = '/tmp/outout.pdb' # hack io.save(fout) if missing: print 'REMARK 000 Missing atoms:' for i in missing: print 'REMARK 000 +', i[0], i[1], i[2], 'residue #', i[3] #raise Exception('Missing atoms in %s' % self.fn) s = StrucFile(fout) self.lines = s.lines
def get_rnapuzzle_ready(self, renumber_residues=True): """Get rnapuzzle ready structure. Submission format @http://ahsoka.u-strasbg.fr/rnapuzzles/ Does: - keep only given atoms, - renumber residues from 1, if renumber_residues=True (by default) """ try: from Bio import PDB from Bio.PDB import PDBIO except: sys.exit( 'Error: Install biopython to use this function (pip biopython)' ) import copy G_ATOMS = [ 'P', 'OP1', 'OP2', 'O5\'', 'C5\'', 'C4\'', 'O4\'', 'C3\'', 'O3\'', 'C2\'', 'O2\'', 'C1\'', 'N9', 'C8', 'N7', 'C5', 'C6', 'O6', 'N1', 'C2', 'N2', 'N3', 'C4' ] A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split( ) U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split( ) C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split( ) ftmp = '/tmp/out.pdb' self.write(ftmp, v=False) parser = PDB.PDBParser() struct = parser.get_structure('', ftmp) model = struct[0] s2 = PDB.Structure.Structure(struct.id) m2 = PDB.Model.Model(model.id) chains2 = [] missing = [] for chain in model.get_list(): res = [] for r in chain: res.append(r) res = copy.copy(res) c2 = PDB.Chain.Chain(chain.id) c = 1 # new chain, goes from 1 !!! for r in res: # hack for amber/qrna r.resname = r.resname.strip() if r.resname == 'RC3': r.resname = 'C' if r.resname == 'RU3': r.resname = 'U' if r.resname == 'RG3': r.resname = 'G' if r.resname == 'RA3': r.resname = 'A' if r.resname == 'C3': r.resname = 'C' if r.resname == 'U3': r.resname = 'U' if r.resname == 'G3': r.resname = 'G' if r.resname == 'A3': r.resname = 'A' if r.resname == 'RC5': r.resname = 'C' if r.resname == 'RU5': r.resname = 'U' if r.resname == 'RG5': r.resname = 'G' if r.resname == 'RA5': r.resname = 'A' if r.resname == 'C5': r.resname = 'C' if r.resname == 'U5': r.resname = 'U' if r.resname == 'G5': r.resname = 'G' if r.resname == 'A5': r.resname = 'A' if r.resname.strip() == 'RC': r.resname = 'C' if r.resname.strip() == 'RU': r.resname = 'U' if r.resname.strip() == 'RG': r.resname = 'G' if r.resname.strip() == 'RA': r.resname = 'A' r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid) if renumber_residues: r2.id = (r2.id[0], c, r2.id[2]) ## renumber residues if str(r.get_resname()).strip() == "G": for an in G_ATOMS: try: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "A": for an in A_ATOMS: try: r2.add(r[an]) except KeyError: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "C": for an in C_ATOMS: try: r2.add(r[an]) except: #print 'Missing:', an, r, ' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) elif str(r.get_resname()).strip() == "U": for an in U_ATOMS: try: r2.add(r[an]) except KeyError: #print 'Missing:', an, r,' new resi', c missing.append([an, chain.id, r, c]) c2.add(r2) c += 1 chains2.append(c2) io = PDBIO() s2.add(m2) for chain2 in chains2: m2.add(chain2) #print c2 #print m2 io.set_structure(s2) #fout = fn.replace('.pdb', '_fx.pdb') fout = '/tmp/outout.pdb' # hack io.save(fout) if missing: print 'REMARK 000 Missing atoms:' for i in missing: print 'REMARK 000 +', i[0], i[1], i[2], 'residue #', i[3] #raise Exception('Missing atoms in %s' % self.fn) # # fix ter 'TER' -> TER 1528 G A 71 # s = StrucFile(fout) self.lines = s.lines c = 0 #ATOM 1527 C4 G A 71 0.000 0.000 0.000 1.00 0.00 C nlines = [] no_ters = 0 for l in self.lines: if l.startswith('TER'): atom_l = self.lines[c - 1] #print 'TER 1528 G A 71 <<<' new_l = 'TER'.ljust(80) new_l = self.set_atom_index( new_l, str(self.get_atom_index(atom_l) + 1 + no_ters)) new_l = self.set_res_code(new_l, self.get_res_code(atom_l)) new_l = self.set_chain_id(new_l, self.get_chain_id(atom_l)) new_l = self.set_res_index(new_l, self.get_res_index(atom_l)) #print new_l nlines.append(new_l) no_ters += 1 else: if self.get_atom_index(l): l = self.set_atom_index(l, self.get_atom_index(l) + no_ters) # 1 ter +1 2 ters +2 etc nlines.append(l) c += 1 self.lines = nlines
def get_interaction_pairs(pdb_filename): """ This function Takes a pdb file path and generates a folder with pdb files holding the unique pairwise interactions in the first pdb :param pdb_filename: :return: ... """ #Loading the pdb files in structure objects parser = PDBParser(PERMISSIVE=1) structure_id = get_structure_name(pdb_filename) filename = pdb_filename structure = parser.get_structure(structure_id, filename) neighbor_chains = get_neighbor_chains(structure) seq_dict = get_seq_dict(structure.get_chains()) similar_sequences = get_similar_sequences(list(structure.get_chains()), seq_dict) interaction_dict = {} # Here we organize the data in similar_sequences and neighbor_chains in a dictionary with pairs of chain types ( # an id representing all chains with more than 95% of similarity) with all the pairwise interactions within this # two chain types for chain1 in neighbor_chains: for chain2 in neighbor_chains[chain1]: nr_interaction = tuple( sorted([ similar_sequences[chain1].get_id(), similar_sequences[chain2].get_id() ])) if tuple( sorted([ similar_sequences[chain1].get_id(), similar_sequences[chain2].get_id() ])) not in interaction_dict: interaction_dict[nr_interaction] = [] interaction_dict[nr_interaction].append([chain1, chain2]) clean_interaction_dict(interaction_dict, similar_sequences) if options.verbose: counter = 0 print('\n') for pair in interaction_dict: print(pair) for int in interaction_dict[pair]: print("\t%s" % int) counter += 1 print(counter) if not os.path.exists(structure_id): os.makedirs(structure_id) else: for the_file in os.listdir(structure_id): file_path = os.path.join(structure_id, the_file) if os.path.isfile(file_path): os.unlink(file_path) io = PDBIO() io.set_structure(structure) for pair in interaction_dict: for interaction in interaction_dict[pair]: io.save( '%s/%s_%s%s.pdb' % (structure_id, structure_id, interaction[0].get_id(), interaction[1].get_id()), ChainSelect(interaction[0], interaction[1])) return structure_id
def save_loop(globalfile, structure_id, loopfile, id1, id2): structure = make_structure_for_pdbfile(globalfile, structure_id) io = PDBIO() p = PDBParser(PERMISSIVE=1) io.set_structure(structure) io.save(loopfile, ResidueSelect(id1, id2))
if p.returncode: print('[!] freesasa did not run successfully', file=sys.stderr) raise Exception(stderr) # Rewind & Parse results file # Save _outf.seek(0) rsa = parse_freesasa_output(_outf) _pdbf.close() _outf.close() return rsa if __name__ == '__main__': P = PDBParser(QUIET=1) io = PDBIO() # Parse structure pdb_path = sys.argv[1] structure, n_chains, n_res = parse_structure(pdb_path) print('[+] Parsed PDB file {0} ({1} chains, {2} residues)'.format( structure.id, n_chains, n_res)) cmplx_sasa = execute_freesasa(structure) print('# Residue\tbbRSA\tscRSA') for res in sorted(cmplx_sasa, key=lambda x: x[2]): print('{0[1]} {0[2]:<4d}\t{1[1]:>6.2f}\t{1[2]:>6.2f}'.format( res, cmplx_sasa[res]))
def randomize_starting_position(ligand_file, complex_file, outputfolder=".", nposes=200, test=False, user_center=None, logger=None): """ Randomize initial ligand position around the receptor. Default number of poses = 200. :param ligand_file: :param complex_file: :param nposes: :return: """ if test: np.random.seed(42) # read in files parser = PDBParser() output = [] structure = parser.get_structure('protein', complex_file) ligand = parser.get_structure('ligand', ligand_file) COI = np.zeros(3) # get center of interface (if PPI) if user_center: try: chain_id, res_number, atom_name = user_center.split(":") except ValueError: raise cs.WrongAtomStringFormat(f"The specified atom is wrong '{user_center}'. \ Should be 'chain:resnumber:atomname'") for chain in structure.get_chains(): if chain.id == chain_id: for residue in chain.get_residues(): if residue.id[1] == int(res_number): for atom in residue.get_atoms(): if atom.name == atom_name: COI = np.array(list(atom.get_vector())) # calculate protein and ligand COM com_protein = calculate_com(structure) com_ligand = calculate_com(ligand) # calculating the maximum d of the ligand coor_ligand = [] for atom in ligand.get_atoms(): coor_ligand.append(list(atom.get_vector() - com_ligand)) coor_ligand = np.array(coor_ligand) coor_ligand_max = np.amax(coor_ligand, axis=0) d_ligand = np.sqrt(np.sum(coor_ligand_max ** 2)) # set threshold for near and far contacts based on ligand d if d_ligand / 2 < 5.0: d5_ligand = 5.0 else: d5_ligand = d_ligand / 2 + 1 if d_ligand > 8.0: d8_ligand = d_ligand / 2 + 4 else: d8_ligand = 8.0 # calculate vector to move the ligandi if user_center: move_vector = com_ligand - COI else: move_vector = com_ligand - com_protein # translate the ligand to the protein COM (COI for PPI) original_coords = [] for atom in ligand.get_atoms(): ligand_origin = np.array(list(atom.get_vector())) - move_vector original_coords.append(ligand_origin) atom.set_coord(ligand_origin) # calculating the maximum radius of the protein from the origin coor = [] for atom in structure.get_atoms(): coor.append(list(atom.get_vector() - com_protein)) coor = np.array(coor) coor_max = np.amax(coor, axis=0) d = np.sqrt(np.sum(coor_max ** 2)) # radius of the sphere from the origin D = 10.0 if user_center else np.ceil(6.0 + d) D_initial = D logger.info("Sampling {}A spherical box around the centre of the receptor/interface.".format(D)) if user_center: sphere_cent = COI else: sphere_cent = com_protein j = 0 logger.info("Generating {} poses...".format(nposes)) start_time = time.time() while (j < nposes): # generate random coordinates phi = np.random.uniform(0, 2 * np.pi) costheta = np.random.uniform(-1, 1) u = np.random.uniform(0, 1) theta = np.arccos(costheta) r = D * np.cbrt(u) x = r * np.sin(theta) * np.cos(phi) y = r * np.sin(theta) * np.sin(phi) z = r * np.cos(theta) # move ligand to the starting point (protein COM) for atom, coord in zip(ligand.get_atoms(), original_coords): atom.set_coord(coord) # translate ligand to a random position translation = (x, y, z) for atom in ligand.get_atoms(): new_pos_lig_trans = np.array(list(atom.get_vector())) - translation atom.set_coord(new_pos_lig_trans) # calculate ligand COM in the new position new_ligand_COM = calculate_com(ligand) # rotate ligand vector = Vector(new_ligand_COM) rotation_matrix = rotaxis(np.random.randint(0, 2 * np.pi), vector) for atom in ligand.get_atoms(): coords_after = atom.get_vector().left_multiply(rotation_matrix) atom.set_coord(coords_after) # check if it's inside the sampling sphere dist = np.sqrt((new_ligand_COM[0] - sphere_cent[0]) ** 2 + (new_ligand_COM[1] - sphere_cent[1]) ** 2 + ( new_ligand_COM[2] - sphere_cent[2]) ** 2) if dist < D: # check contacts at: 5A (no contacts) and 8A (needs contacts) protein_list = Selection.unfold_entities(structure, "A") contacts5 = [] contacts8 = [] ligand_atoms = list(ligand.get_atoms()) contacts5.append( NeighborSearch(protein_list).search(new_ligand_COM, d5_ligand, "S")) contacts8 = NeighborSearch(protein_list).search(new_ligand_COM, d8_ligand, "S") if contacts8 and not any(contacts5): j += 1 io = PDBIO() io.set_structure(ligand) output_name = os.path.join(outputfolder, 'ligand{}.pdb'.format(j)) io.save(output_name) output.append(output_name) start_time = time.time() end_time = time.time() total_time = end_time - start_time if total_time > 60: D += 1 if D - D_initial >= 20: logger.info("Original box increased by 20A. Aborting...") break start_time = end_time logger.info("Increasing sampling box by 1A.") logger.info("{} poses created successfully.".format(j)) return output, D, list(sphere_cent)
default=None, type="string", help="pdb structure file for additional 3-coord cartesian per residue") (options, args) = parser.parse_args() parser = PDBParser() structure = parser.get_structure("mystruct", options.pdbfile) model = structure[0] average_bfactors = {} for residue in model["C"]: average_bfactors[residue.get_id()[1]] = 0.0 for chain in model.get_list(): for residue in chain.get_list(): if residue.has_id("CA"): ca = residue["CA"] average_bfactors[residue.get_id()[1]] += float( ca.get_bfactor()) / float(len(model.get_list())) for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): atom.set_bfactor(average_bfactors[residue.get_id()[1]]) w = PDBIO() w.set_structure(structure) w.save('sym_' + options.pdbfile)
def save_chains(pdb, chains, target): parser = set_parser(pdb) protein = parser.get_structure(pdb[:4], pdb) io = PDBIO() io.set_structure(protein) io.save(target, Chain_select(chains), preserve_atom_numbering=False)
res_195_struct_1 = struct_1[0]['A'][195] res_57_struct_2 = struct_2[0]['A'][57] res_102_struct_2 = struct_2[0]['A'][102] res_195_struct_2 = struct_2[0]['A'][195] # Build 2 lists of atoms for calculating a rot.-trans. matrix # (target and probe). target = [] backbone_names = ['CA', 'N'] for name in backbone_names: target.append(res_57_struct_1[name]) target.append(res_102_struct_1[name]) target.append(res_195_struct_1[name]) probe = [] for name in backbone_names: probe.append(res_57_struct_2[name]) probe.append(res_102_struct_2[name]) probe.append(res_195_struct_2[name]) # Check whether target and probe lists are equal in size. # This is needed for calculating a rot.-trans. matrix assert len(target) == len(probe) # Calculate the rotation-translation matrix. sup = Superimposer() sup.set_atoms(target, probe) # Apply the matrix. Remember that it can be applied only on # lists of atoms. struct_2_atoms = [at for at in struct_2.get_atoms()] sup.apply(struct_2_atoms) # Write the rotation-translated structure out = PDBIO() out.set_structure(struct_2) out.save('1FXY-superimposed.pdb')
def print_structure(self, outfile=''): """Store the processed structure in a file.""" io = PDBIO() io.set_structure(self.structure) io.save(outfile)
class WriteTest(unittest.TestCase): @classmethod def setUpClass(self): self.io = PDBIO() self.parser = PDBParser(PERMISSIVE=1) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) self.structure = self.parser.get_structure("example", "PDB/1A8O.pdb") def test_pdbio_write_structure(self): """Write a full structure using PDBIO.""" struct1 = self.structure # Ensure that set_structure doesn't alter parent parent = struct1.parent # Write full model to temp file self.io.set_structure(struct1) self.assertIs(parent, struct1.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(len(struct2), 1) self.assertEqual(nresidues, 158) finally: os.remove(filename) def test_pdbio_write_preserve_numbering(self): """Test writing PDB and preserve atom numbering.""" self.io.set_structure(self.structure) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) # default preserve_atom_numbering=False struct = self.parser.get_structure("1a8o", filename) serials = [a.serial_number for a in struct.get_atoms()] og_serials = list(range(1, len(serials) + 1)) self.assertEqual(og_serials, serials) finally: os.remove(filename) def test_pdbio_write_auto_numbering(self): """Test writing PDB and do not preserve atom numbering.""" self.io.set_structure(self.structure) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename, preserve_atom_numbering=True) struct = self.parser.get_structure("1a8o", filename) serials = [a.serial_number for a in struct.get_atoms()] og_serials = [a.serial_number for a in self.structure.get_atoms()] self.assertEqual(og_serials, serials) finally: os.remove(filename) def test_pdbio_write_residue(self): """Write a single residue using PDBIO.""" struct1 = self.structure residue1 = list(struct1.get_residues())[0] # Ensure that set_structure doesn't alter parent parent = residue1.parent # Write full model to temp file self.io.set_structure(residue1) self.assertIs(parent, residue1.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) finally: os.remove(filename) def test_pdbio_write_residue_w_chain(self): """Write a single residue (chain id == X) using PDBIO.""" struct1 = self.structure.copy() # make copy so we can change it residue1 = list(struct1.get_residues())[0] # Modify parent id parent = residue1.parent parent.id = "X" # Write full model to temp file self.io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) # Assert chain remained the same chain_id = [c.id for c in struct2.get_chains()][0] self.assertEqual(chain_id, "X") finally: os.remove(filename) def test_pdbio_write_residue_wout_chain(self): """Write a single orphan residue using PDBIO.""" struct1 = self.structure residue1 = list(struct1.get_residues())[0] residue1.parent = None # detach residue # Write full model to temp file self.io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) # Assert chain is default: "A" chain_id = [c.id for c in struct2.get_chains()][0] self.assertEqual(chain_id, "A") finally: os.remove(filename) def test_pdbio_write_custom_residue(self): """Write a chainless residue using PDBIO.""" res = Residue.Residue((" ", 1, " "), "DUM", "") atm = Atom.Atom("CA", [0.1, 0.1, 0.1], 1.0, 1.0, " ", "CA", 1, "C") res.add(atm) # Ensure that set_structure doesn't alter parent parent = res.parent # Write full model to temp file self.io.set_structure(res) self.assertIs(parent, res.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("res", filename) latoms = list(struct2.get_atoms()) self.assertEqual(len(latoms), 1) self.assertEqual(latoms[0].name, "CA") self.assertEqual(latoms[0].parent.resname, "DUM") self.assertEqual(latoms[0].parent.parent.id, "A") finally: os.remove(filename) def test_pdbio_select(self): """Write a selection of the structure using a Select subclass.""" # Selection class to filter all alpha carbons class CAonly(Select): """Accepts only CA residues.""" def accept_atom(self, atom): if atom.name == "CA" and atom.element == "C": return 1 struct1 = self.structure # Ensure that set_structure doesn't alter parent parent = struct1.parent # Write to temp file self.io.set_structure(struct1) self.assertIs(parent, struct1.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename, CAonly()) struct2 = self.parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 70) finally: os.remove(filename) def test_pdbio_missing_occupancy(self): """Write PDB file with missing occupancy.""" with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) structure = self.parser.get_structure("test", "PDB/occupancy.pdb") self.io.set_structure(structure) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", BiopythonWarning) self.io.save(filename) self.assertEqual(len(w), 1, w) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) struct2 = self.parser.get_structure("test", filename) atoms = struct2[0]["A"][(" ", 152, " ")] self.assertIsNone(atoms["N"].get_occupancy()) finally: os.remove(filename) def test_pdbio_write_truncated(self): """Test parsing of truncated lines.""" struct = self.structure # Write to temp file self.io.set_structure(struct) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) # Check if there are lines besides 'ATOM', 'TER' and 'END' with open(filename) as handle: record_set = {l[0:6] for l in handle} record_set -= { "ATOM ", "HETATM", "MODEL ", "ENDMDL", "TER\n", "TER ", "END\n", "END ", } self.assertEqual(len(record_set), 0) finally: os.remove(filename) def test_model_numbering(self): """Preserve model serial numbers during I/O.""" def confirm_numbering(struct): self.assertEqual(len(struct), 3) for idx, model in enumerate(struct): self.assertEqual(model.serial_num, idx + 1) self.assertEqual(model.serial_num, model.id + 1) def confirm_single_end(fname): """Ensure there is only one END statement in multi-model files.""" with open(fname) as handle: end_stment = [] for iline, line in enumerate(handle): if line.strip() == "END": end_stment.append((line, iline)) self.assertEqual(len(end_stment), 1) # Only one? self.assertEqual(end_stment[0][1], iline) # Last line of the file? with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) struct1 = self.parser.get_structure("1lcd", "PDB/1LCD.pdb") confirm_numbering(struct1) # Round trip: serialize and parse again self.io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.parser.get_structure("1lcd", filename) confirm_numbering(struct2) confirm_single_end(filename) finally: os.remove(filename) def test_pdbio_write_x_element(self): """Write a structure with atomic element X with PDBIO.""" struct1 = self.structure # Change element of one atom atom = next(struct1.get_atoms()) atom.element = "X" # X is assigned in Atom.py as last resort self.io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) finally: os.remove(filename) def test_pdbio_write_unk_element(self): """PDBIO raises ValueError when writing unrecognised atomic elements.""" struct1 = self.structure atom = next(struct1.get_atoms()) atom.element = "1" self.io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) with self.assertRaises(ValueError): self.io.save(filename) os.remove(filename)