예제 #1
0
 def Renumber(self, directory):
     ''' Renumber structures starting at 1 '''
     print('\x1b[33m[.] Renumbering structures...\x1b[0m')
     current = os.getcwd()
     pdbfilelist = os.listdir(directory)
     os.chdir(directory)
     for TheFile in tqdm.tqdm(pdbfilelist):
         pdb = open(TheFile, 'r')
         PDB = open(TheFile + 'X', 'w')
         count = 0
         num = 0
         AA2 = None
         for line in pdb:
             count += 1
             AA1 = line[23:27]
             if not AA1 == AA2: num += 1
             final_line =line[:7]+'{:4d}'.format(count)+line[11:17]+\
                line[17:21]+'A'+'{:4d}'.format(num)+line[26:]
             AA2 = AA1
             PDB.write(final_line)
         PDB.close()
         os.remove(TheFile)
         os.rename(TheFile + 'X', TheFile)
     os.chdir(current)
예제 #2
0
def Database():
	''' This function downloads the full PDB database and cleans it up '''
	''' Out put will be a directory called PDBDatabase '''
	#Collect structures
	os.system('rsync -rlpt -v -z --delete --port=33444 rsync.wwpdb.org::ftp/data/structures/divided/pdb/ ./DATABASE')
	current = os.getcwd()
	os.mkdir('PDBDatabase')
	filelist = os.listdir('DATABASE')
	for directories in filelist:
		files = os.listdir(current + '/DATABASE/' + directories)
		for afile in files:
			location = (current + '/DATABASE/' + directories + '/' + afile)
			print(location)
			os.rename(location , current + '/PDBDatabase/' + afile)
	os.system('rm -r ./DATABASE')
	#Clean Database
	pdbfilelist = os.listdir('PDBDatabase')
	io = Bio.PDB.PDBIO()
	os.chdir('PDBDatabase')
	for thefile in pdbfilelist:
		try:
			#Open file
			TheFile = current + '/PDBDatabase/' + thefile
			TheName = thefile.split('.')[0].split('pdb')[1].upper()
			#Extract file
			InFile = gzip.open(TheFile, 'rt')
			#Separate chains and save to different files
			structure = Bio.PDB.PDBParser(QUIET=True).get_structure(TheName , InFile)
			count = 0
			for chain in structure.get_chains():
				io.set_structure(chain)
				io.save(structure.get_id() + '_' + chain.get_id() + '.pdb')
			print('[+] Extracted' + '\t' + thefile.upper())
			os.remove(TheFile)

		except:
			print('[-] Failed to Extracted' + '\t' + thefile.upper())
			os.remove(TheFile)
	os.chdir(current)
	#Remove unwanted structures
	current = os.getcwd()
	pdbfilelist = os.listdir('PDBDatabase')
	for thefile in pdbfilelist:
		TheFile = current + '/PDBDatabase/' + thefile
		structure = Bio.PDB.PDBParser(QUIET=True).get_structure(TheFile.split('.')[0] , TheFile)
		ppb = Bio.PDB.Polypeptide.PPBuilder()
		Type = ppb.build_peptides(structure , aa_only=True)
		#Delete non-protein files
		if Type == []:
			print('[-] NOT PROTEIN\t' , thefile)
			os.remove(TheFile)
		else:
			#Renumber residues
			pdb = open(TheFile , 'r')
			PDB = open(TheFile + 'X' , 'w')
			count = 0
			num = 0
			AA2 = None
			for line in pdb:
				count += 1														#Sequencially number atoms
				AA1 = line[23:27]													#Sequencially number residues
				if not AA1 == AA2:
					num += 1			
				final_line = line[:7] + '{:4d}'.format(count) + line[11:17] + line[17:21] + 'A' + '{:4d}'.format(num) + line[26:]	#Update each line to have its atoms and residues sequencially labeled, as well as being in chain A
				AA2 = AA1
				PDB.write(final_line)													#Write to new file called motif.pdb
			PDB.close()
			print('[+] GOOD\t' , thefile)
			os.remove(TheFile)
			os.rename(TheFile + 'X' , TheFile)