def main():

#	steele_uniprot_codes = ['P07001', 'P0AB67', 'P27306', 'P0A7B3', 'P0ACS2', 'P0A9E2', 'P0A9B2', 'P0A6T1', 'P0AFG8', 'P06959', 'P0A9P0', 'P0AFG3', 'P0AFG6', 'P61889', 'P33940', 'P0AC53', 'P00350']

#	# Get pdb_id for each structure in PDB with respective uniprot_id. Can be many pdb's per target. 
#	# THIS WORKS. DO NOT CHANGE. 

# Ignore this for now. Need to build tool that allows selection of a pdb based on header file information.
# Need to build capability for psiBLAST.	
#	pdb_from_uniprot = PDBFromUniprot()
#	pdb_getter = PDBFromUniprot()
#	uniprot_pdb_mapping = {}
#	for uniprot_id in steele_uniprot_codes:
#		uniprot_pdb_mapping[uniprot_id] = []
#		pdb_codes = pdb_from_uniprot.get_pdb_id(queryText)
#		if pdb_codes != []:
#			for pdb_id in pdb_codes:
#				uniprot_pdb_mapping[uniprot_id].append(pdb_id)
#		else:
#			print "%s returned no pdb codes" % uniprot_id
#	print "steele's mapping: ",  uniprot_pdb_mapping
#	uniprot_id = 'P0A9Q1'
#	queryText = ("<orgPdbQuery>" +
#				 	"<queryType>org.pdb.query.simple.UpAccessionIdQuery</queryType>" +
#						"<description>Simple query for a list of Uniprot Accession IDs: " + uniprot_id + " </description>" +
#						"<accessionIdList>" + uniprot_id + "</accessionIdList>" +
#					"</orgPdbQuery>")
#	pdb_from_uniprot = PDBFromUniprot()

	# proteins with no E. coli structures
	uniprot_ids = ['P0AB67', 'P27306', 'P0A7B3', 'P0A9E2', 'P0AFG8', 'P0AFG3', 'P0AFG6', 'P33940', 'P0AC53', 'H6N162']
	genbank_ids = ['81171066', '11182439', '67470903', '71162387', '84027826', '84027822', '84027824', '2506692', '81175321', '170180374']
	
	pdb_codes = ['1W36DBCY', '2ZHG']
	pops_issue = []
	pocket_finder_issue = []
	ligand_issue = []
	cleaned_file_mapping = {'1W36' : '1W36DBCY', '4B2N' : '4B2NA', '1U60' : '1U60AB', '2XUV' : '2XUVABCD', '1Y00' : '1Y00AB'}
	done = ['1X15', '4N72', '4JDR', '2CMD', '2ZYA', '3NBU', '4TWZ', '1SRU', '1S7C', '4B2NA', '2J1N', '1U60AB', '1YAC', '2XUVABCD', '1Y00AB']
	# Download .pdb using pdb_id
	# THIS WORKS. DO NOT CHANGE
#	pdb_getter = PDBFromUniprot()
	print "===================================================================="
	print "Entering BioVerse Design Pipeline"
	print "===================================================================="
#
	for pdb_code in pdb_codes:
		print "\n\nIdentifying residues of interest for %s" % pdb_code
		# Download .pdb using pdb_id
		# THIS WORKS. DO NOT CHANGE
#		pdb_getter = PDBFromUniprot()
#		pdb_getter.fetch_pdb(pdb_code)
		pdb_editor = EditPDB(pdb_code)
		#cif_getter = CIFFromUniprot()
		#cif_getter.fetch_mmCIF()
#
## Structural pipeline:
#
##	# Get surface residues
##	# THIS WORKS. DO NOT CHANGE
		sr_getter = SurfaceResidues(pdb_code)
		sr_getter.write_resi_sasa_output()
		sr_getter.write_frac_sasa_output()
		pdb_editor.edit_bfactor_sasa()
		sr_getter.write_surface_resi_output(0.3)
		pdb_editor.edit_bfactor_surface_residues()
#
#
## How many pdbs are there for each uniprot id? What is the difference between these sequences
## What ligand is bound? Extract out all residues within 5
#
#	# Get ligand
#	# THIS WORKS. DO NOT CHANGE
		ligand = LigandBindingSite(pdb_code)
		ligand.get_residues_within_5A()
		ligand.write_residue_output()
		pdb_editor.edit_bfactor_ligand_binding_pocket()
#
#
## 	# Find pockets 
##	# THIS WORKS. DO NOT CHANGE
		rosetta = Rosetta(pdb_code)
		rosetta.find_pockets()
		pdb_editor.edit_bfactor_pocket_residues()
#
#
##	# Make 'mutants_list' file for ddg_monomer
##	# THIS WORKS. DO NOT CHANGE
		ListMaker = MutantListMaker(pdb_code)
#		ListMaker.generate_mutant_list(pocketres=True, lpocket=True, SurfRes=True)
		ListMaker.generate_mutant_list(pocketres=True, lpocket=True)
#
		ddgMonomer = DDGMonomer(pdb_code)
		ddgMonomer.get_targets(5.5)
Example #2
0
def main():
	
#	# Get uniprot_id for target in database. Only one uniprot_id for each target.
#	# This needs to be changed for postgress
#	genbank_ids = []
#	uniprot_ids = []
#
#	Data = open('/Users/Andrea/repositories/design_pipeline/src/database/database.csv')
#	data = Data.read().split('\r')
#	Data.close
#	for line in data:
#		if line.startswith("#"):
#			pass
#		else:
#			genbank_id, uniprot_id = line.split(',')[1], line.split(',')[2]
#			genbank_ids.append(genbank_id)
#			uniprot_ids.append(uniprot_id)

#	# Get pdb_id for each structure in PDB with respective uniprot_id. Can be many pdb's per target. 
#	# THIS WORKS. DO NOT CHANGE. 
#	uniprot_id = 'P0A9Q1'
#	queryText = ("<orgPdbQuery>" +
#				 	"<queryType>org.pdb.query.simple.UpAccessionIdQuery</queryType>" +
#						"<description>Simple query for a list of Uniprot Accession IDs: " + uniprot_id + " </description>" +
#						"<accessionIdList>" + uniprot_id + "</accessionIdList>" +
#					"</orgPdbQuery>")
#	pdb_from_uniprot = PDBFromUniprot()
	
#	steele_uniprot_codes = ['P07001', 'P0AB67', 'P27306', 'P0A7B3', 'P0ACS2', 'P0A9E2', 'P0A9B2', 'P0A6T1', 'P0AFG8', 'P06959', 'P0A9P0', 'P0AFG3', 'P0AFG6', 'P61889', 'P33940', 'P0AC53', 'P00350']

#	steele_pdb_codes = ['1X15', '2ZHG', '1S7C', '3NBU', '4N72', '4JDR', '2CMD', '2ZYA']
#	pdb_codes = pdb_from_uniprot.get_pdb_id(queryText)
#	print pdb_codes

#	# Download .pdb using pdb_id
#	# THIS WORKS. DO NOT CHANGE
#	pdb_getter = PDBFromUniprot()
#	pdb_code = '4PDJ'
	#pdb_code = '2XGE'
#	pdb_getter.fetch_pdb(pdb_code)
#	cif_getter = CIFFromUniprot()
#	cif_getter.fetch_mmCIF(pdb_code, pdb_dir)

# Structural pipeline:
# How many pdbs are there for each uniprot id? What is the difference between these sequences
# What ligand is bound? Extract out all residues within 5

#	# Get ligand
#	# THIS WORKS. DO NOT CHANGE
	ligand = LigandBindingSite('4PDJ')
	ligand.get_residues_within_5A()
	ligand.write_residue_output()

#	# Get surface residues
#	# THIS WORKS. DO NOT CHANGE
	sr_getter = SurfaceResidues('4PDJ')
	sr_getter.write_resi_sasa_output()
	sr_getter.write_surface_resi_output(0.3)
#	sr_getter.write_frac_sasa_output()

# 	# Find pockets 
#	# THIS WORKS. DO NOT CHANGE
	rosetta = Rosetta('4PDJ')
	rosetta.find_pockets()

#	# Edit the B-factor column of a pdb
#	# THIS WORKS. Needs improvement with file handling
	pdb_editor = EditPDB('4PDJ')
	pdb_editor.edit_bfactor_sasa()
	pdb_editor.edit_bfactor_ligand_binding_pocket()
	pdb_editor.edit_bfactor_surface_residues()
#	pdb_editor.edit_bfactor_pockets()
	pdb_editor.edit_bfactor_pocket_residues()
#	pdb_editor.write_bfactor()

#	# Make 'mutants_list' file for ddg_monomer
#	# THIS WORKS. DO NOT CHANGE
	ListMaker = MutantListMaker('4PDJ')
	ListMaker.generate_mutant_list()

	ddgMonomer = DDGMonomer('4PDJ')
	ddgMonomer.get_targets(5.5)
Example #3
0
def main():

    #	# Get uniprot_id for target in database. Only one uniprot_id for each target.
    #	# This needs to be changed for postgress
    #	genbank_ids = []
    #	uniprot_ids = []
    #
    #	Data = open('/Users/Andrea/repositories/design_pipeline/src/database/database.csv')
    #	data = Data.read().split('\r')
    #	Data.close
    #	for line in data:
    #		if line.startswith("#"):
    #			pass
    #		else:
    #			genbank_id, uniprot_id = line.split(',')[1], line.split(',')[2]
    #			genbank_ids.append(genbank_id)
    #			uniprot_ids.append(uniprot_id)

    #	# Get pdb_id for each structure in PDB with respective uniprot_id. Can be many pdb's per target.
    #	# THIS WORKS. DO NOT CHANGE.
    #	uniprot_id = 'P0A9Q1'
    #	queryText = ("<orgPdbQuery>" +
    #				 	"<queryType>org.pdb.query.simple.UpAccessionIdQuery</queryType>" +
    #						"<description>Simple query for a list of Uniprot Accession IDs: " + uniprot_id + " </description>" +
    #						"<accessionIdList>" + uniprot_id + "</accessionIdList>" +
    #					"</orgPdbQuery>")
    #	pdb_from_uniprot = PDBFromUniprot()
    #	pdb_codes = pdb_from_uniprot.get_pdb_id(queryText)
    #	print pdb_codes

    #	# Download .pdb using pdb_id
    #	# THIS WORKS. DO NOT CHANGE
    #	pdb_getter = PDBFromUniprot()
    #	pdb_code = '4PDJ'
    #	pdb_getter.fetch_pdb(pdb_code)
    #	cif_getter = CIFFromUniprot()
    #	cif_getter.fetch_mmCIF(pdb_code, pdb_dir)

    # Structural pipeline:
    # How many pdbs are there for each uniprot id? What is the difference between these sequences
    # What ligand is bound? Extract out all residues within 5

    #	# Get ligand
    #	# THIS WORKS. DO NOT CHANGE
    #	ligand = LigandBindingSite('4PDJ')
    #	ligand.get_residues_within_5A()
    #	ligand.write_residue_output()

    #	# Get surface residues
    #	# THIS WORKS. DO NOT CHANGE
    sr_getter = SurfaceResidues('4PDJ')
    #	sr_getter.write_resi_sasa_output()
    #	sr_getter.write_surface_resi_output(0.3)
    sr_getter.write_frac_sasa_output()

    #	# Edit the B-factor column of a pdb
    #	# Currently this requires a .txt file in the /pdb directory for the editing... needs work
    pdb_editor = EditPDB('test')
    #	pdb_editor.edit_bfactor_sasa()
    #	pdb_editor.edit_bfactor_ligand_binding_pocket()
    #	pdb_editor.edit_bfactor_surface_residues()
    pdb_editor.write_bfactor()

    #	# Linear regression
    correlation = Correlation()
    correlation.linregress('4PDJ_fracsasa.txt', 'test_pulled_bfactors.txt')
Example #4
0
def main():
	
#	# Get uniprot_id for target in database. Only one uniprot_id for each target.
#	# This needs to be changed for postgress
#	genbank_ids = []
#	uniprot_ids = []
#
#	Data = open('/Users/Andrea/repositories/design_pipeline/src/database/database.csv')
#	data = Data.read().split('\r')
#	Data.close
#	for line in data:
#		if line.startswith("#"):
#			pass
#		else:
#			genbank_id, uniprot_id = line.split(',')[1], line.split(',')[2]
#			genbank_ids.append(genbank_id)
#			uniprot_ids.append(uniprot_id)

#	# Get pdb_id for each structure in PDB with respective uniprot_id. Can be many pdb's per target. 
#	# THIS WORKS. DO NOT CHANGE. 
#	uniprot_id = 'P0A9Q1'
#	queryText = ("<orgPdbQuery>" +
#				 	"<queryType>org.pdb.query.simple.UpAccessionIdQuery</queryType>" +
#						"<description>Simple query for a list of Uniprot Accession IDs: " + uniprot_id + " </description>" +
#						"<accessionIdList>" + uniprot_id + "</accessionIdList>" +
#					"</orgPdbQuery>")
#	pdb_from_uniprot = PDBFromUniprot()
#	pdb_codes = pdb_from_uniprot.get_pdb_id(queryText)
#	print pdb_codes

#	# Download .pdb using pdb_id
#	# THIS WORKS. DO NOT CHANGE
#	pdb_getter = PDBFromUniprot()
#	pdb_code = '4PDJ'
#	pdb_getter.fetch_pdb(pdb_code)
#	cif_getter = CIFFromUniprot()
#	cif_getter.fetch_mmCIF(pdb_code, pdb_dir)

# Structural pipeline:
# How many pdbs are there for each uniprot id? What is the difference between these sequences
# What ligand is bound? Extract out all residues within 5

#	# Get ligand
#	# THIS WORKS. DO NOT CHANGE
#	ligand = LigandBindingSite('4PDJ')
#	ligand.get_residues_within_5A()
#	ligand.write_residue_output()

#	# Get surface residues
#	# THIS WORKS. DO NOT CHANGE
	sr_getter = SurfaceResidues('4PDJ')
#	sr_getter.write_resi_sasa_output()
#	sr_getter.write_surface_resi_output(0.3)
	sr_getter.write_frac_sasa_output()

#	# Edit the B-factor column of a pdb
#	# Currently this requires a .txt file in the /pdb directory for the editing... needs work
	pdb_editor = EditPDB('test')
#	pdb_editor.edit_bfactor_sasa()
#	pdb_editor.edit_bfactor_ligand_binding_pocket()
#	pdb_editor.edit_bfactor_surface_residues()
	pdb_editor.write_bfactor()

#	# Linear regression
	correlation = Correlation()
	correlation.linregress('4PDJ_fracsasa.txt', 'test_pulled_bfactors.txt')