def clean_pdb(pdb_file, pdb_chain, out_dir): out_dir_chain = out_dir + '/' + 'chain' if not os.path.isfile(pdb_file): raise argparse.ArgumentTypeError("PDB file could not be found.") # Create output directories if they do not already exist if not os.path.exists(out_dir): os.makedirs(out_dir) if not os.path.exists(out_dir_chain): os.makedirs(out_dir_chain) # Grab PDB name and make sure it's converted to uppercase pdb_name = os.path.basename(pdb_file).split('.')[0].upper() # Extract chain of interest structure = parsePDBStructure(pdb_file) # Make sure chain exists, otherwise throw an error try: chain = structure[0][pdb_chain] except KeyError: print("\nERROR:\n\n\t"+pdb_name+": chain "+pdb_chain+" could not be found.\n") return io = PDBIO() chain_select = ChainSelect(pdb_chain) io.set_structure(structure) pdb_chain_file = out_dir_chain+'/'+pdb_name+'_'+pdb_chain+'_temp.pdb' io.save(pdb_chain_file, chain_select) # Remove HetAtoms temp_file = out_dir + "/" + pdb_name + "_temp.pdb" removeHetAtoms(pdb_file, temp_file) temp_file_chain = out_dir_chain + "/" + pdb_name + '_' + pdb_chain + "_temp2.pdb" removeHetAtoms(pdb_chain_file, temp_file_chain) # Renumber PDB structure = parsePDBStructure(temp_file) (new_pdb, renumbered_pdb) = renumberResidues(structure, pdb_name) structure_chain = parsePDBStructure(temp_file_chain) (new_pdb_chain, renumbered_pdb) = renumberResidues(structure_chain, pdb_name + '_' + pdb_chain) # Remove waters removeWaters(new_pdb, out_dir + "/" + pdb_name + ".pdb") removeWaters(new_pdb_chain, out_dir_chain+'/'+pdb_name+'_'+pdb_chain+'.pdb') # Clean up temporary files os.remove(pdb_chain_file) os.remove(temp_file) os.remove(temp_file_chain) os.remove(new_pdb_chain) os.remove(new_pdb)
def main(): args = sys.argv pdb = args[1] pdb_name = os.path.basename(pdb).split('.')[0].upper() usage = "\nUsage:\n\n\textract_state.py <pdb>\n\n" \ "\tThis script prints whether a given PDB is monomeric or multimeric. Must be used with RAW biological assembly for accurate results." assert os.path.exists(pdb), usage structure = parsePDBStructure(pdb) print pdb_name,",",checkMultimer(structure) return 0