def Renumber(self, directory): ''' Renumber structures starting at 1 ''' print('\x1b[33m[.] Renumbering structures...\x1b[0m') current = os.getcwd() pdbfilelist = os.listdir(directory) os.chdir(directory) for TheFile in tqdm.tqdm(pdbfilelist): pdb = open(TheFile, 'r') PDB = open(TheFile + 'X', 'w') count = 0 num = 0 AA2 = None for line in pdb: count += 1 AA1 = line[23:27] if not AA1 == AA2: num += 1 final_line =line[:7]+'{:4d}'.format(count)+line[11:17]+\ line[17:21]+'A'+'{:4d}'.format(num)+line[26:] AA2 = AA1 PDB.write(final_line) PDB.close() os.remove(TheFile) os.rename(TheFile + 'X', TheFile) os.chdir(current)
def Database(): ''' This function downloads the full PDB database and cleans it up ''' ''' Out put will be a directory called PDBDatabase ''' #Collect structures os.system('rsync -rlpt -v -z --delete --port=33444 rsync.wwpdb.org::ftp/data/structures/divided/pdb/ ./DATABASE') current = os.getcwd() os.mkdir('PDBDatabase') filelist = os.listdir('DATABASE') for directories in filelist: files = os.listdir(current + '/DATABASE/' + directories) for afile in files: location = (current + '/DATABASE/' + directories + '/' + afile) print(location) os.rename(location , current + '/PDBDatabase/' + afile) os.system('rm -r ./DATABASE') #Clean Database pdbfilelist = os.listdir('PDBDatabase') io = Bio.PDB.PDBIO() os.chdir('PDBDatabase') for thefile in pdbfilelist: try: #Open file TheFile = current + '/PDBDatabase/' + thefile TheName = thefile.split('.')[0].split('pdb')[1].upper() #Extract file InFile = gzip.open(TheFile, 'rt') #Separate chains and save to different files structure = Bio.PDB.PDBParser(QUIET=True).get_structure(TheName , InFile) count = 0 for chain in structure.get_chains(): io.set_structure(chain) io.save(structure.get_id() + '_' + chain.get_id() + '.pdb') print('[+] Extracted' + '\t' + thefile.upper()) os.remove(TheFile) except: print('[-] Failed to Extracted' + '\t' + thefile.upper()) os.remove(TheFile) os.chdir(current) #Remove unwanted structures current = os.getcwd() pdbfilelist = os.listdir('PDBDatabase') for thefile in pdbfilelist: TheFile = current + '/PDBDatabase/' + thefile structure = Bio.PDB.PDBParser(QUIET=True).get_structure(TheFile.split('.')[0] , TheFile) ppb = Bio.PDB.Polypeptide.PPBuilder() Type = ppb.build_peptides(structure , aa_only=True) #Delete non-protein files if Type == []: print('[-] NOT PROTEIN\t' , thefile) os.remove(TheFile) else: #Renumber residues pdb = open(TheFile , 'r') PDB = open(TheFile + 'X' , 'w') count = 0 num = 0 AA2 = None for line in pdb: count += 1 #Sequencially number atoms AA1 = line[23:27] #Sequencially number residues if not AA1 == AA2: num += 1 final_line = line[:7] + '{:4d}'.format(count) + line[11:17] + line[17:21] + 'A' + '{:4d}'.format(num) + line[26:] #Update each line to have its atoms and residues sequencially labeled, as well as being in chain A AA2 = AA1 PDB.write(final_line) #Write to new file called motif.pdb PDB.close() print('[+] GOOD\t' , thefile) os.remove(TheFile) os.rename(TheFile + 'X' , TheFile)