def scwrl(self, altseq): """ Repacks sidechains using SCWRL4 and returns a copy """ io = PDBIO() seqfname = "temp/%d.txt" % multidigit_rand(10) with open(seqfname, 'wb') as seqfile: structfile = "temp/%d.pdb" % multidigit_rand(10) seqfile.write(altseq) scwrlfile = structfile + ".scwrl" io.set_structure(self.structure) io.save(structfile) cmd = [ "scwrl", "-0", "-i", structfile, '-s', seqfname, '-o', scwrlfile ] print "\n%s" % ' '.join(cmd) sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE).communicate() p = PDBParser() with open(scwrlfile, 'rb') as fin: filterwarnings('ignore', category=PDBConstructionWarning) s = p.get_structure(self.id, scwrlfile) resetwarnings() s = PDBMapStructure(s, pdb2pose={}, refseq=self.refseq) os.remove(structfile) os.remove(scwrlfile) os.remove(seqfname) return s
def test_pdbio_write_pqr_structure(self): """Write a full structure using PDBIO.""" # Create a PDBIO object in pqr mode with example_structure as an argument io = PDBIO(is_pqr=True) io.set_structure(self.example_structure) # Write to a temporary file filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: # Export example_structure to a temp file io.save(filename) # Parse exported structure output_struct = self.pqr_parser.get_structure("1a8o", filename) # Comparisons self.assertEqual(len(output_struct), len(self.example_structure)) # Structure Length original_residues = len(list( self.example_structure.get_residues())) parsed_residues = len(list(output_struct.get_residues())) self.assertEqual(parsed_residues, original_residues) # Number of Residues # Atom-wise comparison original_atoms = self.example_structure.get_atoms() for atom in output_struct.get_atoms(): self.assertEqual(atom, next(original_atoms)) finally: os.remove(filename)
def splitOnePDB(fname, outPath): try: s= parser.get_structure(fname, fname) except Exception: print ("Error loading pdb") return 0 banLenChains=[] try: for chain in s[0]: badResInChain=0 for res in chain.get_list(): if not is_aa(res,standard=True): badResInChain+=1 chainLen= sum(1 for res in chain if "CA" in res) - badResInChain if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN: print(chainLen) banLenChains.append(chain.get_id()) except KeyError: print ("Not good model") return 0 for badChainId in banLenChains: s[0].detach_child(badChainId) receptorChainList= [] ligandChainList= [] if len( s[0].get_list())<2: print(s) print( s[0].get_list()) print("Not enough good chains") return 0 for chain1 in s[0]: tmpReceptorList=[] for chain2 in s[0]: if chain1!= chain2: tmpReceptorList.append(chain2) if len(tmpReceptorList)>1 or not tmpReceptorList[0] in ligandChainList: ligandChainList.append(chain1) receptorChainList.append(tmpReceptorList) prefix= os.path.basename(fname).split(".")[0] for i, (ligandChain, receptorChains) in enumerate(zip(ligandChainList, receptorChainList)): io=PDBIO() ligandStruct= Structure(prefix+"ligand") ligandStruct.add(Model(0)) ligandChain.set_parent(ligandStruct[0]) ligandStruct[0].add(ligandChain) io.set_structure(ligandStruct) io.save(os.path.join(outPath,prefix+"-"+str(i)+"_l_u.pdb")) io=PDBIO() receptorStruct= Structure(prefix+"receptor") receptorStruct.add(Model(0)) for receptorChain in receptorChains: receptorChain.set_parent(receptorStruct[0]) receptorStruct[0].add(receptorChain) io.set_structure(receptorStruct) io.save(os.path.join(outPath,prefix+"-"+str(i)+"_r_u.pdb")) print( "ligand:", ligandChain, "receptor:",receptorChains )
def collect_1(self, checkboard = []): def getChains(s): ret = s.split('___')[:2] assert(len(ret) == 2) return ret parser = PDBParser() io = PDBIO() for f in self.files: if not checkboard: break if f not in checkboard: continue try: os.mkdir(os.path.join( self.outpath, f)) except OSError as e: if e.errno != errno.EEXIST: raise structure = parser.get_structure(f, os.path.join(self.inpath, f)) chain_A, chain_B = getChains(f) io.set_structure(structure[0]['A']) io.save(os.path.join(self.outpath,f,chain_A + '.pdb')) io.set_structure(structure[0]['B']) io.save(os.path.join(self.outpath,f,chain_B + '.pdb')) #make this module can be reuse to other application self.then_do(f) #remove the finished file from checkboard checkboard.remove(f)
def write_PDB(entity, file, pdbid=None, chainid=None): """Write PDB file with HEADER and TITLE.""" with as_handle(file, 'w') as fp: try: if 'S' == entity.level: if not pdbid: pdbid = entity.header.get('idcode', None) hdr = entity.header.get('head', None) dd = entity.header.get('deposition_date', None) if hdr: fp.write(('HEADER {:40}{:8} {:4}\n' ).format(hdr.upper(), (dd or ''), (pdbid or ''))) nam = entity.header.get('name', None) if nam: fp.write('TITLE ' + nam.upper() + '\n') io = PDBIO() io.set_structure(entity) io.save(fp) else: raise PDBException("level not 'S': " + str(entity.level)) except KeyError: raise Exception( "write_PIC: argument is not a Biopython PDB Entity " + str(entity))
def cut_7_helix(self, source_pdb, target_folder, offset=0): io = PDBIO() parser = PDBParser(QUIET=True) code = source_pdb[-8:-4] struct = parser.get_structure(code, source_pdb) plane = list(struct[0][' '].get_residues()) try: up_plane = plane[1]['O'].get_coord()[2] + offset except KeyError: up_plane = plane[0]['O'].get_coord()[2] + offset try: down_plane = plane[0]['N'].get_coord()[2] - offset except KeyError: down_plane = plane[1]['N'].get_coord()[2] - offset chain_id = self.get_TM_chain(list(struct[0]), up_plane, down_plane) chain = struct[0][chain_id] io.set_structure(struct) io.save(target_folder + code + '_tm.pdb', self.TMSelect(up_plane, down_plane, chain))
def write_PDB(entity: Structure, file: str, pdbid: str = None, chainid: str = None) -> None: """Write PDB file with HEADER and TITLE.""" enumerate_atoms(entity) with as_handle(file, "w") as fp: try: if "S" == entity.level: if hasattr(entity, "header"): if not pdbid: pdbid = entity.header.get("idcode", None) hdr = entity.header.get("head", None) dd = pdb_date(entity.header.get("deposition_date", None)) if hdr: fp.write(("HEADER {:40}{:8} {:4}\n").format( hdr.upper(), (dd or ""), (pdbid or ""))) nam = entity.header.get("name", None) if nam: fp.write("TITLE " + nam.upper() + "\n") io = PDBIO() io.set_structure(entity) io.save(fp, preserve_atom_numbering=True) else: raise PDBException("level not 'S': " + str(entity.level)) except KeyError: raise Exception( "write_PIC: argument is not a Biopython PDB Entity " + str(entity))
def CreatePDB(self, coordArray, fPath, ofile): sloppyparser = PDBParser(PERMISSIVE=True, QUIET=True) structure = sloppyparser.get_structure("MD_system", fPath) print("\nGenerating PDB file...") sb = StructureBuilder() sb.set_header(structure.header) # Iterate through models for i in range(len(list(structure.get_models()))): # Iterate through chains models = list(structure.get_models()) counter = 0 for j in range(len(list(models[i].get_chains()))): chains = list(models[i].get_chains()) #Iterate thgouth residues for k in range(len(list(chains[j].get_residues()))): #Iterate through residues = list(chains[j].get_residues()) for l in range(len(list(residues[k].get_atoms()))): #Set coord for each for atom in structure[i][chains[j].id][ residues[k].id].get_atoms(): structure[i][chains[j].id][residues[k].id][ atom.id].set_coord( np.array((float(coordArray[counter][0]), float(coordArray[counter][1]), float(coordArray[counter][2])))) #print(structure[i][chains[j].id][residues[k].id][atom.id].get_vector()) counter += 1 io = PDBIO() io.set_structure(structure) io.save(ofile) print("Transform file written to: " + ofile)
def main(pdbfile, scheme, outfile): pdb_io = PDBIO() parser = PDBParser() structure = parser.get_structure('self', pdbfile) model = structure[0] chain = ' ' anarci_dict = {} fastafile = Path(pdbfile).stem + '.fasta' with open(fastafile, 'w+') as ff: subprocess.run(['pdb_tofasta', '-multi', pdbfile], stdout=ff) out = subprocess.run(['anarci', '-i', fastafile, '--scheme', scheme], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = out.stdout.decode("utf-8").splitlines() for line in output: l = line.strip() li = line.split() if line.startswith('#'): if re.search("PDB", l): ch = l.split('|') chain = ch[1] anarci_dict[chain] = [] elif line.startswith(chain): if len(li) == 3: if li[2] == '-': continue else: ch = li[0] resseq = int(li[1]) inscode = ' ' residue = li[2] het = ' ' tuple = (het, resseq, inscode) anarci_dict[chain].append(tuple) elif len(li) == 4: ch = li[0] resseq = int(li[1]) inscode = str(li[2]) residue = li[3] het = ' ' tuple = (het, resseq, inscode) anarci_dict[chain].append(tuple) else: continue for residue in structure.get_residues(): residue.id = (' ', residue.id[1] + 900, residue.id[2]) for d in anarci_dict: count = 0 for i, residue in enumerate(model[d]): if i < len(anarci_dict[d]): residue.id = anarci_dict[d][i] count = residue.id[1] else: count = count + 1 residue.id = (' ', count, ' ') pdb_io.set_structure(structure) pdb_io.save(outfile)
def run_naccess(model, pdb_file, probe_size=None, z_slice=None, naccess='naccess', temp_path='/tmp/'): # make temp directory; tmp_path = tempfile.mkdtemp(dir=temp_path) # file name must end with '.pdb' to work with NACCESS # -> create temp file of existing pdb # or write model to temp file handle, tmp_pdb_file = tempfile.mkstemp('.pdb', dir=tmp_path) os.close(handle) if pdb_file: pdb_file = os.path.abspath(pdb_file) shutil.copy(pdb_file, tmp_pdb_file) else: writer = PDBIO() writer.set_structure(model.get_parent()) writer.save(tmp_pdb_file) # chdir to temp directory, as NACCESS writes to current working directory old_dir = os.getcwd() os.chdir(tmp_path) # create the command line and run # catch standard out & err command = [naccess, tmp_pdb_file] if probe_size: command.extend(['-p', probe_size]) if z_slice: command.extend(['-z', z_slice]) p = subprocess.Popen(command, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() os.chdir(old_dir) rsa_file = tmp_pdb_file[:-4] + '.rsa' asa_file = tmp_pdb_file[:-4] + '.asa' # Alert user for errors if err.strip(): warnings.warn(err) if (not os.path.exists(rsa_file)) or (not os.path.exists(asa_file)): raise Exception('NACCESS did not execute or finish properly.') # get the output, then delete the temp directory with open(rsa_file) as rf: rsa_data = rf.readlines() with open(asa_file) as af: asa_data = af.readlines() # shutil.rmtree(tmp_path, ignore_errors=True) return rsa_data, asa_data
def save_chain_to(chain, filename: str): from Bio.PDB.PDBIO import PDBIO io = PDBIO() # io.set_structure(chain.get_bio_chain()) structure = Structure(filename) structure.add(chain) io.set_structure(structure) io.save(filename)
def writeFileBioPDB(struct,fn): io = PDBIO() io.set_structure(struct) try: io.save(fn) except Exception, e: print " "+str(e) sys.exit(1)
def extract(structure, chain_id, start, end, filename): """ Write out selected portion to filename. """ sel = ChainSelector(chain_id, start, end) io = PDBIO() io.set_structure(structure) io.save(filename, sel)
def splitOnePDB(fname, chainIdL, chainIdR, outPath): print(os.path.basename(fname)) try: s = parser.get_structure(os.path.basename(fname), fname) except Exception: print("Error loading pdb") return 0 banLenChains = [] try: for chain in s[0]: badResInChain = 0 for res in chain.get_list(): if not is_aa(res, standard=True) and res.resname != "HOH": badResInChain += 1 # for res in chain: print(res) chainLen = sum(1 for res in chain if "CA" in res) - badResInChain if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN: print(chain, chainLen) banLenChains.append(chain.get_id()) except KeyError: print("Not good model") return 0 # print(banLenChains) if len(s[0].get_list()) - len(banLenChains) < 2: print(s) print(s[0].get_list()) print("Not enough good chains") return 0 ligandChains, receptorChains = findNeigChains(s, chainIdL, chainIdR) print("ligand:", ligandChains, "receptor:", receptorChains) prefix = os.path.basename(fname).split(".")[0] io = PDBIO() ligandStruct = Structure(prefix + "ligand") ligandStruct.add(Model(0)) for ligandChain in ligandChains: ligandChain.set_parent(ligandStruct[0]) ligandStruct[0].add(ligandChain) io.set_structure(ligandStruct) io.save( os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_l_u.pdb")) io = PDBIO() receptorStruct = Structure(prefix + "receptor") receptorStruct.add(Model(0)) for receptorChain in receptorChains: receptorChain.set_parent(receptorStruct[0]) receptorStruct[0].add(receptorChain) io.set_structure(receptorStruct) io.save( os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_r_u.pdb"))
def filter_pdb(input_path, output_path, chain): """ Filter a PDB file to the chain of interest """ pdb_name = Path(input_path).stem pdb_parser = PDBParser() structure = pdb_parser.get_structure(pdb_name, input_path) pdbio = PDBIO() pdbio.set_structure(structure) pdbio.save(output_path, select=ChainSelect(chain))
def export_structure(structure, name, format): """ Writes the strucuture into a file. The file can be either a pdb or a mmcif. """ if format == "pdb": io = PDBIO() elif format == "cif": io = pdb.MMCIFIO() io.set_structure(structure) io.save(name)
def pose(self): """ Loads the PDBMapStructure as a Rosetta::Pose object """ import_rosetta() io = PDBIO() io.set_structure(self.structure) with tempfile.NamedTemporaryFile('wrb',suffix='.pdb',delete=False) as tf: io.save(tf.name) pose = rosetta.Pose() rosetta.pose_from_pdb(pose,tf.name) os.remove(tf.name) return pose
def dssp_dict_from_structure(structure, dssp="dsspcmbi", id=None): """ In keeping with the spirit of functional programming... Creates a DSSP dictionary from more than just a filename. Pipes the structure into a dssp process if you send it one. @type structure: filename (string), Bio.PDB.Entity, hpf.hddb.db.Structure @return (Bio.PDB.Entity, dssp, keys) """ from subprocess import Popen, PIPE from cStringIO import StringIO dssp = Popen("which %s" % dssp, shell=True, stdout=PIPE).communicate()[0].strip() assert os.path.exists(dssp) # If we have a structure object we will pipe it into DSSP # otherwise it will be loaded and parsed. if isinstance(structure, basestring): pipe = None dssp = "%s %s" % (dssp, structure) from Bio.PDB import PDBParser structure = PDBParser().get_structure(id if id else structure, structure) else: # We write to a buffer which will be piped into the DSSP stdin # using pipe.getvalue(), the dssp stdin option must be added dssp = "%s --" % dssp import tempfile pipe = tempfile.NamedTemporaryFile("w") from Bio.PDB.Structure import Entity, Structure as BioStructure if isinstance(structure, Entity): s = structure while s.get_parent() != None: s = s.get_parent() from Bio.PDB.PDBIO import PDBIO io = PDBIO() io.set_structure(s) io.save(pipe.name) else: # Otherwise this is of type hpf.hddb.db.Structure # avoid importing the class and mapped meta table stuff with pipe as handle: pipe.write(structure.text) structure = structure.structure # execute the process, piping data if necessary # print pipe.getvalue() out, err = Popen(dssp, shell=True, stdin=pipe if pipe else None, stdout=PIPE, stderr=PIPE).communicate() io = StringIO() io.write(out) io.seek(0) return (structure,) + make_dssp_dict(io)
def get_pdb_string(pdb_path, select=None): """ Get string representation of a PDB file, filtered using select as in Bio.PDB.PDBIO """ pdb_parser = PDBParser() structure = pdb_parser.get_structure('_', pdb_path) pdbio = PDBIO() pdbio.set_structure(structure) with StringIO('PDB') as virtual_pdb_file: pdbio.save(virtual_pdb_file, select=select) return virtual_pdb_file.getvalue()
def write_pdb(structure, file_name, selector=None, preserve_atom_numbering=False): """ Write a PDB file from a given structure with a given file_name. Optionally, write specific atoms with selector and preserve atom numbering. """ writer = PDBIO() writer.set_structure(structure) if selector is None: writer.save(file_name, preserve_atom_numbering=preserve_atom_numbering) else: writer.save(file_name, selector, preserve_atom_numbering=preserve_atom_numbering)
def run_naccess(model, pdb_file, probe_size=None, z_slice=None, naccess='naccess', temp_path='/tmp/'): # make temp directory; chdir to temp directory, # as NACCESS writes to current working directory tmp_path = tempfile.mktemp(dir=temp_path) os.mkdir(tmp_path) old_dir = os.getcwd() os.chdir(tmp_path) # file name must end with '.pdb' to work with NACCESS # -> create temp file of existing pdb # or write model to temp file tmp_pdb_file = tempfile.mktemp('.pdb', dir=tmp_path) if pdb_file: os.system('cp %s %s' % (pdb_file, tmp_pdb_file)) else: writer = PDBIO() writer.set_structure(model.get_parent()) writer.save(tmp_pdb_file) # create the command line and run # catch standard out & err command = '%s %s ' % (naccess, tmp_pdb_file) if probe_size: command += '-p %s ' % probe_size if z_slice: command += '-z %s ' % z_slice in_, out, err = os.popen3(command) in_.close() stdout = out.readlines() out.close() stderr = err.readlines() err.close() # get the output, then delete the temp directory rsa_file = tmp_pdb_file[:-4] + '.rsa' rf = open(rsa_file) rsa_data = rf.readlines() rf.close() asa_file = tmp_pdb_file[:-4] + '.asa' af = open(asa_file) asa_data = af.readlines() af.close() os.chdir(old_dir) os.system('rm -rf %s >& /dev/null' % tmp_path) return rsa_data, asa_data
def save_structure(self, filename, superimpose_structure=None): """ Save the generated sample to a pdb file. @param filename: The file to which the structure will be written. @type filename: str @param superimpose_structure: Structure to which the sampled structure is to be superimposed. @type superimpose_structure: Structure """ io = PDBIO() io.set_structure(self.get_structure()) io.save(filename)
def main(): if len(sys.argv) < 2: print("Please input fasta filename as an argument for the script") sys.exit() filename = sys.argv[1] if filename.endswith('.fasta'): blast_result = blast_sequence(filename) else: print("Unknown file format (need to be fasta file)") sys.exit() pdb_filepath = download_pdb_support_sequence(blast_result) filter_pdb(pdb_filepath) p = PDBParser(PERMISSIVE=1) structure = p.get_structure('file', 'filtered.pdb') seqObj = generate_fasta_from_pdb(structure) target_seq = SeqIO.read(filename, format="fasta").seq alignments = pairwise_alignement(target_seq, seqObj.seq) #Consider CA atoms only for computing distances ca_atoms = [ atom for model in structure for chain in model for residue in chain for atom in residue if atom.get_id() == "CA" ] gaps = insert_gaps_in_atomlist(ca_atoms, alignments[0][0]) insertions = insert_gaps_in_atomlist(ca_atoms, alignments[0][1]) print(gaps) print(insertions) print("Searching for best part for gaps") gap_parts = get_gaps_parts(gaps) print("Searching for best part for insertions") ins_parts = get_gaps_parts( insertions, get_insertion_type(alignments[0][0], alignments[0][1])) print("Completing gaps/insertions by found parts") complete_atoms = [ atom for model in structure for chain in model for residue in chain for atom in residue ] complete_atoms = insert_gaps_insertions_in_atomlist( complete_atoms, ins_parts, gap_parts) print("Writing final pdb file in : final.pdb") io = PDBIO() io.set_structure(build_structure(complete_atoms)) io.save('final.pdb')
def generate_output_file(final_model, out_name): """ This function takes as input both the final model created with the building algorithm and the output filename given by the user (if not defined, is macrocomplex by default). Eventually, it returns the file saved in either ".pdb" or ".mmcif" format. """ out_name = str(out_name.strip()) # If the output file is too big, we save it in ".mmcif" format if len(list(final_model[0].get_atoms())) > 99999 or len( list(final_model[0].get_chains())) > 62: mmcif_IO = MMCIFIO() mmcif_IO.set_structure(final_model[0]) mmcif_IO.save(out_name + ".cif") # Otherwise, save it ".pdb" format else: pdb_IO = PDBIO() pdb_IO.set_structure(final_model[0]) pdb_IO.save(out_name + ".pdb")
def clean(self): p = PDBParser() io = PDBIO() with tempfile.NamedTemporaryFile('wrb',suffix='.pdb',delete=False) as tf: io.set_structure(self.structure) io.save(tf.name) cmd = ['lib/clean_pdb.py',tf.name,'ignorechain','nopdbout'] logger.info("Shell to: %s"%' '.join(cmd)) proc = sp.Popen(cmd,stdout=sp.PIPE) s = p.get_structure(self.get_id(),proc.stdout) p = lib.PDBMapIO.PDBMapParser() s = p.process_structure(s,force=True) s = PDBMapStructure(s,pdb2pose=self._pdb2pose,refseq=self.refseq) os.remove(tf.name) return s
def main(args): """Main script""" pdb_name = Path(args.pdb).stem # deal with FoldX repaired PDBs if pdb_name.endswith('_Repair'): pdb_name = pdb_name.replace('_Repair', '') pdb_parser = PDBParser() structure = pdb_parser.get_structure(pdb_name, args.pdb) sections = import_sections(args.yaml, pdb_name) pdbio = PDBIO() pdbio.set_structure(structure) pdbio.save(sys.stdout, select=SectionSelecter(sections))
def run_naccess(model, pdb_file, probe_size=None, z_slice=None, naccess='naccess', temp_path='/tmp/'): # make temp directory; tmp_path = tempfile.mkdtemp(dir=temp_path) # file name must end with '.pdb' to work with NACCESS # -> create temp file of existing pdb # or write model to temp file handle, tmp_pdb_file = tempfile.mkstemp('.pdb', dir=tmp_path) os.close(handle) if pdb_file: pdb_file = os.path.abspath(pdb_file) shutil.copy(pdb_file, tmp_pdb_file) else: writer = PDBIO() writer.set_structure(model.get_parent()) writer.save(tmp_pdb_file) # chdir to temp directory, as NACCESS writes to current working directory old_dir = os.getcwd() os.chdir(tmp_path) # create the command line and run # catch standard out & err command = [naccess, tmp_pdb_file] if probe_size: command.extend(['-p', probe_size]) if z_slice: command.extend(['-z', z_slice]) # p = subprocess.Popen(command, universal_newlines=True, # stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell = True) p = subprocess.call(" ".join(command), shell=True) #out, err = p.communicate() os.chdir(old_dir) # get the output, then delete the temp directory rsa_file = tmp_pdb_file[:-13] + 'SIFTS.rsa' with open(rsa_file) as rf: rsa_data = rf.readlines() asa_file = tmp_pdb_file[:-13] + 'SIFTS.asa' with open(asa_file) as af: asa_data = af.readlines() shutil.rmtree(tmp_path, ignore_errors=True) return rsa_data, asa_data
def _temp(self,structure): if isinstance(structure,basestring): assert(os.path.exists(structure)) return structure elif isinstance(structure,Structure): temp = tempfile.NamedTemporaryFile("w") # Save the structure in a tempfile from Bio.PDB.PDBIO import PDBIO io = PDBIO() io.set_structure(structure) io.save(temp) self._temp_files.append(temp) return temp.name else: print type(structure) raise TypeError(structure)
def run_naccess(model, pdb_file, probe_size = None, z_slice = None, \ naccess = 'naccess', temp_path = '/tmp/'): # make temp directory; chdir to temp directory, # as NACCESS writes to current working directory tmp_path = tempfile.mktemp(dir = temp_path) os.mkdir(tmp_path) old_dir = os.getcwd() os.chdir(tmp_path) # file name must end with '.pdb' to work with NACCESS # -> create temp file of existing pdb # or write model to temp file tmp_pdb_file = tempfile.mktemp('.pdb', dir = tmp_path) if pdb_file: os.system('cp %s %s' % (pdb_file, tmp_pdb_file)) else: writer = PDBIO() writer.set_structure(model.get_parent()) writer.save(tmp_pdb_file) # create the command line and run # catch standard out & err command = '%s %s ' % (naccess, tmp_pdb_file) if probe_size: command += '-p %s ' % probe_size if z_slice: command += '-z %s ' % z_slice in_, out, err = os.popen3(command) in_.close() stdout = out.readlines() out.close() stderr = err.readlines() err.close() # get the output, then delete the temp directory rsa_file = tmp_pdb_file[:-4] + '.rsa' rf = open(rsa_file) rsa_data = rf.readlines() rf.close() asa_file = tmp_pdb_file[:-4] + '.asa' af = open(asa_file) asa_data = af.readlines() af.close() os.chdir(old_dir) os.system('rm -rf %s >& /dev/null' % tmp_path) return rsa_data, asa_data
def remove_residues(self, request, claims): """ Remove residues from a PDB structure For a detailed input description see the file: mdstudio_structures/schemas/endpoints/removed_residues_request_v1.json And for a detailed description of the output see: mdstudio_structures/schemas/endpoints/removed_residues_response_v1.json """ request['workdir'] = os.path.abspath(request['workdir']) # Parse the structure parser = PDBParser(PERMISSIVE=True) struc_obj = StringIO(request.get('mol')) structure = parser.get_structure('mol_object', struc_obj) struc_obj.close() to_remove = [r.upper() for r in request.get('residues', [])] removed = [] for model in structure: for chain in model: for residue in chain: if residue.get_resname() in to_remove: chain.detach_child(residue.id) removed.append(residue.get_resname()) if len(chain) == 0: model.detach_child(chain.id) self.log.info('Removed residues: {0}'.format(','.join(removed))) # Save to file or string pdbio = PDBIO() pdbio.set_structure(structure) status = 'completed' if request.get('workdir'): result = os.path.join(request.get('workdir'), 'structure.pdb') pdbio.save(result) else: outfile = StringIO() pdbio.save(outfile) outfile.seek(0) result = outfile.read() return {'status': status, 'mol': result}
def func1(): import sys import re import gzip from Bio.PDB.MMCIFParser import MMCIFParser parser = MMCIFParser(QUIET=True) from Bio.PDB.PDBParser import PDBParser parser1 = PDBParser(PERMISSIVE=0, QUIET=True) from Bio.PDB.PDBIO import PDBIO #pathmmcif = "/Users/tarun/Documents/mmCIF" #pathmmcif = "/data/pdb/divided/mmCIF" pathmmcif = "/Volumes/BIOINFO/mmCIF" #pathmmcif = "/Volumes/RCSB_DATA/pdb" #count = 0 #if count == 0: try: pdb1 = "{}".format(sys.argv[2]) fol = pdb1[1:3] c1 = "{}".format(sys.argv[3]) pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1) #pdbfile = "{}/{}/pdb{}.ent.gz".format(pathmmcif,fol,pdb1) tar = gzip.open("{}".format(pdbfile), "rb") out = open("pdbprocess.cif", "wb") #out = open("pdbprocess.pdb","wb") out.write(tar.read()) tar.close() out.close() structure_id = "{}".format(pdb1) filename = "pdbprocess.cif" #filename = "pdbprocess.pdb" structure = parser.get_structure(structure_id, filename) model = structure[0] chain = model["{}".format(c1)] io = PDBIO() io.set_structure(chain) io.save("chain1.pdb") except: print("FILE NOT FOUND")
def downloadUsingMmtf(pdbId, fnameOut, maxNumberOfChains=MAX_NUMBER_OF_CHAINS): print("downloadUsingMmtf") try: parser = MMTFParser() struct = parser.get_structure_from_url(pdbId) if not 0 in struct: return False if len(struct[0]) > maxNumberOfChains: raise NoValidPDBFile( "The maximun number of allowed chains is %d (%d) for %s" % (maxNumberOfChains, len(struct[0]), pdbId)) writter = PDBIO() writter.set_structure(struct) writter.save(fnameOut) return True except (Exception, ValueError, HTTPError) as e: print(e) if isinstance(e, NoValidPDBFile): raise e return False
def collect_0(self): def getChains(s): ret = s.split('___')[:2] assert(len(ret) == 2) return ret parser = PDBParser() io = PDBIO() for f in self.files: try: os.mkdir(os.path.join( self.outpath, f)) except OSError as e: if e.errno != errno.EEXIST: raise structure = parser.get_structure(f, os.path.join(self.inpath, f)) chain_A, chain_B = getChains(f) io.set_structure(structure[0]['A']) io.save(os.path.join(self.outpath,f,chain_A + '.pdb')) io.set_structure(structure[0]['B']) io.save(os.path.join(self.outpath,f,chain_B + '.pdb')) self.then_do(f)
def rmsd(pdbid,chain,decoy_file,astral=False): """Calculate the RMS between the two structures based on sequence alignment.""" if astral: pdb = hpf.pdb.get_astral(pdbid) else: pdb = hpf.pdb.get_pdb(pdbid) temp = tempfile.NamedTemporaryFile("w") try: # Save the pdbchain io = PDBIO() io.set_structure(pdb) io.save(temp, select=hpf.pdb.PDBChainSelector(chain)) # Perform Mammoth alignment decoy_id = os.path.basename(decoy_file) cl = Mammoth.MammothCL(temp.name, decoy_file) mm = Mammoth.do_alignment(cl) return mm finally: temp.close()
def save_structure(self, output_pdb_path: str, mod_id: str = None): """ Saves structure on disk in PDB format Args: output_pdb_path: OS path to the output file mod_id (optional): model to write Errors: OSError: Error saving the file """ if not output_pdb_path: raise OutputPathNotProvidedError pdbio = PDBIO() if mod_id is None: pdbio.set_structure(self.st) pdbio.save(output_pdb_path) else: pdbio.set_structure(self.st[mod_id]) pdbio.save(output_pdb_path)
def moveAndWriteAsPDBIfMmcif(fnameIn, fnameOut, removeInput=False): from Config import Configuration conf = Configuration() minNumResidues, maxNumResidues = conf.minNumResiduesPartner, conf.maxNumResiduesPartner try: struct, __ = loadPdbIfIsPath(fnameIn) totalNumRes = 0 for chain in struct[0]: nResInChain = len(chain.get_list()) totalNumRes += nResInChain if not (minNumResidues < totalNumRes < maxNumResidues): raise BadNumberOfResidues(totalNumRes) else: writter = PDBIO() writter.set_structure(struct) writter.save(fnameOut) if removeInput: os.remove(fnameIn) return True except Exception as e: print("Error in moveAndWriteAsPDBIfMmcif !!!", e) return False
def extract(structure, chain_id, start, end, filename): """ Write out selected residues from a particular given pdb structure to filename. Args: structure: The four letter pdb code for the protein (this pdb must currently exist!) chain_id: The chain in the pdb you are trying to extract start: The beginning residue that you want to start your extraction from end: filename: The filename (ex. my_protein.pdb) of the pdb that you will extract the selected residues to Returns: A pdb file that contains only the selected residues from the original pdb structure given as an argument Example of usage p=PDBParser() s=p.get_structure('X', '1RUZ.pdb') extract(s, 'H', 40, 60, 'extracted.pdb') """ sel=ChainSelector(chain_id, start, end) io=PDBIO() io.set_structure(structure) io.save(filename, sel)
def main(verbose=False, msafile=None, pdbfile=None, renumligs=False): onelettercode = {'ASP':'D', 'GLU':'E', 'ASN':'N', 'GLN':'Q', 'ARG':'R', 'LYS':'K', 'PRO':'P', 'GLY':'G', 'CYS':'C', 'THR':'T', 'SER':'S', 'MET':'M', 'TRP':'W', 'PHE':'F', 'TYR':'Y', 'HIS':'H', 'ALA':'A', 'VAL':'V', 'LEU':'L', 'ILE':'I'} wd = os.getcwd() msa = AlignIO.read(read(msafile), 'fasta') querypdbid = fileprefix(pdbfile) querypdb = PDBParser(PERMISSIVE=1, QUIET=(not verbose)).get_structure(querypdbid, pdbfile) #for each chain, get it's sequence qseqdicts = dict() chains = dict( (chain.get_id(), chain) for chain in querypdb.get_list()[0].get_list() ) for chainid in chains: qseqdicts[chainid] = dict() residuelist = chains[chainid].get_list() for i, residue in enumerate(residuelist): if residue.get_resname() not in onelettercode: oletter = 'X' else: oletter = onelettercode[residue.get_resname()] qseqdicts[chainid][int(residue.get_id()[1])] = oletter #get seqs as strings for chainid in qseqdicts: print "\nCurrent chain is '%s'." % chainid qseqdict = qseqdicts[chainid] qseq = "".join( [qseqdict[resnr] for resnr in sorted(qseqdict.keys())] ) qseqkeys = [resnr for resnr in sorted(qseqdict.keys())] qseq = SeqRecord(Seq(qseq,IUPAC.protein),id=querypdbid) # determine the sequence closest to the one in the PDB file i=0 maxid = 0 # maximum sequence identity maxal = maxmsaseq = None # align qseq to each seq in the msa and find the one with highest identity for alignment in msa: msaseq = SeqRecord(alignment.seq, id=alignment.id) ids = (qseq.id, msaseq.id) ali = Needle(pair=ids,records=(qseq, msaseq), name=ids, confopts={'gapopen':10, 'gapextend':0, 'brief':0}) # wd='/tmp/needle' ali.align(force_protein=True) seqid = NeedleSingle(outfile=ali.outfile).getResults()['Longest_Identity'] if seqid > maxid: maxid = seqid maxal = AlignIO.read(ali.outfile, 'emboss') maxmsaseq = msaseq try: qseq = str(maxal[0].seq) tseq = str(maxal[1].seq) msaseq = str(maxmsaseq.seq) print "Highest seq id is %.2f for:\n%s\n================" % (maxid, str(maxal)) except TypeError: print "No alignment could be generated." continue # number letters in the msa sequence # get the positions without gaps, this gives the actual positions in tseq msaseqindices = [ i for i in range(1,len(msaseq)+1) if msaseq[i-1] != '-' ] # get positions with gaps # qseqgappos = [ i for i in range(len(qseq)) if tseq[i] == '-' ] # qseqgappos.reverse() # qseqkeys = range(len(qseq)) # for i in qseqgappos: # del qseqkeys[i] # trim qseq if some pos there are mapped to gaps in tseq # expand the list of qseqkeys to align with MSA tmp = list(qseqkeys) qseqkeys = [] tmp.reverse() for i in range(len(qseq)): if qseq[i] != '-': key = tmp.pop() else: key = None if tseq[i] != '-': qseqkeys.append(key) qseq = [ qseq[i] for i in range(len(qseq)) if tseq[i] != '-' ] # and trim gaps from tseq tseq = [ tseq[i] for i in range(len(tseq)) if tseq[i] != '-' ] # now, tseq is guaranteed gap-free #replace each position in qseqindices by None if there is a gap in qseq newqseqindices = list(msaseqindices) for i in range(len(qseq)): if qseq[i] == '-': newqseqindices[i] = None #print newqseqindices # now newqseqindices is the list of indices for the renumbering in the order of the query sequence #sanity check assert(len(qseqkeys) == len(qseq) == len(tseq) == len(newqseqindices)) for i in range(len(qseqkeys)): if qseqkeys[i] is None: assert(newqseqindices[i] is None) #remove empty positions and create a dict of the remapping newqseqindices = [index for index in newqseqindices if index is not None] qseqkeys = [index for index in qseqkeys if index is not None] newqseqindices = dict(zip(qseqkeys,newqseqindices)) #renumber residues chain = chains[chainid] for residue in chain.get_list(): resid = list(residue.get_id()) index = resid[1] is_het = len(resid[0].strip()) != 0 if index in newqseqindices and not is_het: resid[1] = newqseqindices[index] elif renumligs: resid[1] = 0 residue.id = tuple(resid) # write renumbered PDB file io = PDBIO() io.set_structure(querypdb) io.save(os.path.join(wd, '%s_renum.pdb' % querypdbid))
def main(): # validate file path if not args: print "Error: No file path provided." sys.exit(1) filepath1 = "/home/noel/Projects/Protein_design/Insulin/OIPD/2hiu_1H.pdb" filepath2 = "/home/noel/Projects/Protein_design/Insulin/OIPD/2zta_1H.pdb" # I added a file path for the lucine zipper molecule. # The program will have two structures. The first one # is to be centered, and the secondone is the one that # will be placed around the first one in many different # orientations. For now I will call them filepath1 and # filepath2. filepath1 = args[0] if not os.path.exists(filepath1): print "Error: File path for molecule to be centered does not exist." sys.exit(1) filepath2 = args[1] if not os.path.exists(filepath2): print "Error: File path for molecule to be rotated does not exist." sys.exit(1) # TODO We should eventually perform more rigorous validation, # e.g. verifying file permissions. # assign structure id. if one is not provided, default to file name. structure_id = options.id if not structure_id: structure_id = os.path.splitext(os.path.basename(filepath1))[0] # We read both structures and place insuline at the center and # leucine zipper at [5,0,0] structure1 = pdb_parser.get_structure("Centered One", filepath1) center_of_mass1 = _calculate_center_of_mass(structure1) # print center_of_mass1 location_vextor1 = center_of_mass1 translate_molecule(structure1, center_molecule(center_of_mass1)) center_of_mass1 = _calculate_center_of_mass(structure1) print center_of_mass1 structure2 = pdb_parser.get_structure("Rotated One", filepath2) center_of_mass2 = _calculate_center_of_mass(structure2) # print center_of_mass2 translate_molecule(structure2, center_molecule(center_of_mass2)) location_vector2 = [45, 0, 0] translate_molecule(structure2, location_vector2) center_of_mass2 = _calculate_center_of_mass(structure2) # print center_of_mass2 # we have two structures with possible identical chain identifiers # we need to make sure there are no duplicates when structure 1 and 2 # are merged. We will relable structure2. # TODO: There can only be as many labels as letters in the alphabet. # It is unlikely that we will need more lables in the near future, # but constructs one day may have more chains than letters in the alphabet. ids = {} for i in string.ascii_uppercase: ids[i] = False # First We used model 0 of structure 1 and turned used_ids for that chain # identifier to True. for i in structure1[0]: ids[i.id] = True # Now we go through structure 2 and if there are any chains with the same id # as those found in structure 1, we will change the chain ids to something else for i in structure2[0]: if ids[i.id]: for j in string.ascii_uppercase: if not ids[j]: i.id = j ids[j] = True break # We want to join the two structures into one structure, with one model # and the chains of structure 1 and 2. First, deepcopy copies an object # recursively structure3 = copy.deepcopy(structure1) structure3.id = "Ensamble" for i in structure2.get_chains(): structure3[0].add(i) structure_id = 0 path_dir = "/home/noel/Projects/Protein_design/Insulin/OIPD/pdbs/" # DELETED: (45,45,180),(45,45,360),(45,90,180),(45,90,360),(45,135,180),(45,135,360),(45,180,180),(45,180,360) locations = [ (45, 0, 0), (90, 0, 0), (135, 0, 0), (180, 0, 0), (225, 0, 0), (270, 0, 0), (315, 0, 0), (360, 0, 0), (45, 45, 45), (45, 45, 90), (45, 45, 135), (45, 45, 225), (45, 45, 270), (45, 45, 315), (45, 90, 45), (45, 90, 90), (45, 90, 135), (45, 90, 225), (45, 90, 270), (45, 90, 315), (45, 135, 45), (45, 135, 90), (45, 135, 135), (45, 135, 225), (45, 135, 270), (45, 135, 315), (45, 180, 45), (45, 180, 90), (45, 180, 135), (45, 180, 225), (45, 180, 270), (45, 180, 315), ] for i in locations: RotMat = genMatrix(i[0] * np.pi / 180, i[1] * np.pi / 180, i[2] * np.pi / 180) center_of_mass2 = _calculate_center_of_mass(structure2) translate_molecule(structure2[0], center_molecule(center_of_mass2)) translate_molecule(structure2[0], list(np.dot(np.asarray(location_vector2), RotMat))) io = PDBIO() io.set_structure(structure3) io.save(path_dir + "struct_" + str(structure_id) + ".pdb") # print(structure_id, center_of_mass2) structure_id = structure_id + 1
res_id = (' ',num_count,' ') residue = Residue(res_id,'ALA',' ') cur_coord = tuple(points[i]) bfactor = bfactors[i] atom = Atom('CA',cur_coord,bfactor,0,' ','CA',num_count,'C') residue.add(atom) chain.add(residue) model.add(chain) structure.add(model) # -------------------------------------------------------------------- io=PDBIO() io.set_structure(structure) if ( args['dst'] is None): fn = sys.stdout io.save(fn) if ( args['link'] ): for i in range(1,shape(points)[0]): fn.write( "CONECT%5d%5d\n" % (i, i+1)) else: fn = args['dst'] io.save(fn) fout = open(fn,"a") if (args['link'] ): for i in range(1,shape(points)[0]): fout.write( "CONECT%5d%5d\n" % (i, i+1)) fout.close() # print "output file: " + args['dst']
def setup_structures_for_linking(filepath1, filepath2, charmmdir, pdb_path_dir): cmc = md.CenterOfMassCalculator() rig = MRM.Molecular_Rigid_Manipulation() structure1 = pdb_parser.get_structure('Centered', filepath1) rig.translate_molecule(structure1,rig.center_molecule(cmc.calculate_center_of_mass(structure1))) structure2 = pdb_parser.get_structure('Rotated', filepath2) rig.translate_molecule(structure2,rig.center_molecule(cmc.calculate_center_of_mass(structure2))) # we have two structures with possible identical chain identifiers # we need to make sure there are no duplicates when structure 1 and 2 # are merged. We will relable structure2. # TODO: There can only be as many labels as letters in the alphabet. # It is unlikely that we will need more lables than that in the near future, # but constructs one day may have more chains than letters in the alphabet. ids = {} for i in string.ascii_uppercase: ids[i] = False # First We used model 0 of structure 1 and turned used_ids for that chain # identifier to True. for i in structure1[0]: ids[i.id] = True # Now we go through structure 2 and if there are any chains with the same id # as those found in structure 1, we will change the chain ids to something else for i in structure2[0]: if ids[i.id]: for j in string.ascii_uppercase: if not ids[j]: i.id = j ids[j] = True break #print _calculate_center_of_charge(rtp, structure) # We want to join the two structures into one structure, with one model # and the chains of structure 1 and 2. First, deepcopy copies an object # recursively structure3 = copy.deepcopy(structure1) structure3.id = 'Ensamble' for i in structure2.get_chains(): structure3[0].add(i) # TODO: This works only for angles between 0 and 90 not including 0, and 90 # and will generate angles in all 8 quadrants of the cartesian coordinate system Angle_Separation = 45 locations = [] for h in range(0,3): for i in range(0,360,Angle_Separation): for j in range(0,90/Angle_Separation-1): if h == 0: z = 0 elif h == 1: z = np.cos(45*np.pi/180) elif h == 2: z = -1*np.cos(45*np.pi/180) locations.append([np.cos(i*np.pi/180),np.sin(i*np.pi/180),z]) locations.append([0,0,1]) locations.append([0,0,-1]) for i in range(0,len(locations)): locations[i] = list(locations[i]/np.linalg.norm(locations[i])) structure_id = 0 for i in locations: ccc = md.ChargeCalculator(charmmdir) center_of_charge2 = ccc.calculate_center_of_charge(structure2) center_of_charge2 = center_of_charge2/np.linalg.norm(center_of_charge2) RM = rig.alignVectors(i,center_of_charge2) for j in structure2.get_atoms(): v2 = [j.get_coord()[0],j.get_coord()[1],j.get_coord()[2]] jj = np.dot(v2,RM) j.set_coord(jj) ii = [k*45 for k in i] rig.translate_molecule(structure2,ii) io = PDBIO() io.set_structure(structure3) io.save(pdb_path_dir+'struct_'+str(structure_id)+'.pdb') structure_id = structure_id + 1 rig.translate_molecule(structure2,rig.center_molecule(cmc.calculate_center_of_mass(structure2)))
structure.add(model) # -------------------------------------------------------------------- # for model in structure: # print # for chain in model: # for residue in chain: # for atom in residue: # print atom.get_full_id() # print atom.coord io=PDBIO() io.set_structure(structure) fn = outputfilename io.save(fn,write_end=False) fout = open(fn,"a") for j in range (0 ,15): for i in range(1,newpoints[j]): fout.write( "CONECT%5d%5d\n" % (i, i+1)) print "output file: " + outputfilename # -------------------------------------------------------------------- # reinitialize # load uniform.pdb # split_states all # zoom # spectrum # as cartoon