def makeFoldxRepair(name):
  #initial_pose = pose_from_pdb(name)

  # Set up ScoreFunction.
  #sf = get_fa_scorefxn()

  # Set up MoveMap.
  #mm = MoveMap()
  #mm.set_bb(True)
  #mm.set_chi(True)

  # Pack and minimize initial pose to remove clashes.
  #pre_pre_packing_score = sf(initial_pose)

  #task = standard_packer_task(initial_pose)
  #task.restrict_to_repacking()
  #task.or_include_current(True)
  #pack_rotamers_mover = RotamerTrialsMover(sf, task)
  #pack_rotamers_mover.apply(initial_pose)

  #min_mover = MinMover()
  #min_mover.movemap(mm)
  #min_mover.score_function(sf)
  #min_mover.min_type('linmin')
 
  #min_mover.apply(initial_pose)

  #initial_pose.dump_pdb(str(name))
  print("cleaning:", name)
  cleanATOM(name)
def main():
	CN_len=[]
	from toolbox import cleanATOM
	#Note that I could not access the WHATIF set so I used a set of structures from the PDB
	pdb_ID=['EGFR','centuximab','1BKR','5P21','1E6K','1F21','1R9H','2HDA','2O72','2IT6']
	
	for i in range(len(pdb_ID)):
		cleanATOM(pdb_ID[i]+'.pdb')
		#create pose from PDB
		pose=pose_from_pdb(pdb_ID[i]+'.clean.pdb')
		
		#find CN lengths for all bonds in PDB
		#updates list that contains bond lengths of all poses
		CN_len=find_CN_lens(CN_len,pose)
		
		
	#write CN lengths to a file
	write_file=open('CA_N_Bond_Lens.txt','w')
	for i in range(len(CN_len)):
		write_file.write(str(CN_len[i])+'\n')
	
	#generate plots
	make_plots(CN_len)
def main():
	#initializes input as False, must be found to be valid to proceed
	valid=False
	while valid==False:
		#prompts user for input
		#accepts XXXX or XXXX.pdb
		pdb=raw_input('Please input PDB ID: ')
		#gives user the ability to leave the loop
		if pdb=='stop':
			print 'You have chosen to leave the program. Goodbye!'
			sys.exit()
		if len(pdb)==4:
			pdb+='.pdb'

		valid=check_valid(pdb)
		if valid==False:
			print "PDB ID was not valid. Please input only the 4 character PDB code"
			print "If you would like to stop inputting ID's, type in 'stop' when prompted for ID"
			
		
		
	
	from toolbox import cleanATOM, pose_from_rcsb
	
	cleanATOM(pdb)
	
	#create pose
	pose=pose_from_pdb(pdb[0:4]+'.clean'+pdb[4:])

	seq=pose.sequence()
	

	#initialize all variables that will count sec. struct. types for ala and non_ala resi

	non_ala_h=0
	non_ala_s=0
	non_ala_l=0
	ala_h=0
	ala_s=0
	ala_l=0

	for i in range(len(seq)-1):
		#find sec_struct of particular residue
		sec_struct=determine_sec_struct(seq[i],i+1,seq[i+1],i+2,pose)
		#classify residue structure, update appropriate variable
		if sec_struct==0:
			non_ala_h+=1
		elif sec_struct==1:
			non_ala_s+=1
		elif sec_struct==2:
			ala_h+=1
		elif sec_struct==3:
			ala_s+=1
		elif sec_struct==4:
			ala_l+=1
		elif sec_struct==5:
			non_ala_l+=1
	
	#divide ala totals by totals to get propensity
	p_ala_h=ala_h/float(non_ala_h+ala_h)
	p_ala_s=ala_s/float(non_ala_s+ala_s)
	p_ala_l=ala_l/float(non_ala_l+ala_l)

	print "Helix Propensity=",p_ala_h*100
	print "Sheet Propensity=",p_ala_s*100
	print "Loop Propensity=",p_ala_l*100
Esempio n. 4
0
def main():
    #takes name of pdb file without the extention
    args = sys.argv
    pdb_file = args[1]
    #set up timer to figure out how long the code took to run
    t0 = time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Amino acids, notice there is no C
    AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q",
           "R", "S", "T", "V", "W", "Y")
    #Number of mutations to accept
    max_accept_mut = 1500
    #Population size
    N = 100
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load and clean up pdb file
    name = pdb_file + ".pdb"
    cleanATOM(name)
    clean_name = pdb_file + ".clean.pdb"
    initial_pose = pose_from_pdb(clean_name)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    #change these for more or less flexability
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    #Set threshold for selection
    threshold = pre_pre_packing_score / 2

    data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n')

    #number of residues to select from
    n_res = initial_pose.total_residue()

    #start sim
    i = 0
    gen = 0
    while i < max_accept_mut:
        #update the number of generations that have pased
        gen += 1

        print 'accepts:', i

        #pick a place to mutate
        mut_location = random.randint(1, n_res)

        #get the amino acid at that position
        res = initial_pose.residue(mut_location)

        #don't mess with C, just choose again
        while (res.name1() == 'C'):
            mut_location = random.randint(1, n_res)
            #get the amino acid at that position
            res = initial_pose.residue(mut_location)

#choose the amino acid to mutate to
        new_mut_key = random.randint(0, len(AAs) - 1)

        proposed_res = AAs[new_mut_key]

        #don't bother mutating to the same amino acid it just takes more time
        while (proposed_res == res.name1()):
            new_mut_key = random.randint(0, len(AAs) - 1)
            proposed_res = AAs[new_mut_key]

#make the mutation
#this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it.
        mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res,
                                     PACK_RADIUS, sf)

        #score mutant
        variant_score = sf(mutant_pose)

        #get the probability that the mutation will be accepted
        probability = calc_prob_mh(variant_score, post_pre_packing_score, N,
                                   beta, threshold)

        #test to see if mutation is accepted
        if random.random() < probability:

            #create a name for the mutant if its going to be kept
            variant_name = res.name1() + str(initial_pose.pdb_info().number(
                mut_location)) + str(proposed_res)

            # Assuming 1000 burn in phase, take this if out if you want to store everything
            if i > 1000:
                #save name and energy change
                data.append(variant_name + "," + str(variant_score) + "," +
                            str(variant_score - post_pre_packing_score) + "," +
                            str(probability) + "," + str(gen) + "\n")

                pdb_name = str(i) + ".pdb"
                mutant_pose.dump_pdb(pdb_name)

            #update the wildtype
            initial_pose = mutant_pose
            post_pre_packing_score = variant_score

            #update number of accepts
            i += 1

    print '\nMutations and scoring complete.'
    t1 = time()
    # Output results.
    data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)

    print 'Data written to:', data_filename
    print 'program takes %f' % (t1 - t0)
Esempio n. 5
0
#!/usr/bin/env python
'''This python file is for Homework 2, Problem 4. I didn't use the template. I created a torsion angle file in my code but did not use the torsion angles to calculate the L/H/E propensities.
    --by Xiaotong Zuo, Feb. 2016
    '''

# import
from rosetta import *
init()
from toolbox import get_secstruct
from toolbox import cleanATOM
import sys

# use 1m40.pdb as template, first, cleanATOM
cleanATOM("1m40.pdb")

# load pose
pose=pose_from_pdb("1m40.clean.pdb")


### I did not use torsion angles to calculate the propensities!
# create torsion.dat: phi and psi
f=open("/Users/XT/Downloads/1yy8.pdb","r")
g=open("torsion.dat","w+")
for line in f.readlines():
    a=line.split()
    if "ATOM"==a[0] and "CA"==a[2]:
        g.write(str(a[3])+"\t"+str(a[5])+"\t"+str(pose.phi(int(a[5])))+"\t"+str(pose.psi(int(a[5])))+"\t"+str(pose.psi(int(a[5]))+pose.phi(int(a[5])))+'\n')
        print a
f.close()
g.close()
##  http://graylab.jhu.edu/pyrosetta/downloads/documentation/Workshop6_PyRosetta_Packing_Design.pdf
from toolbox import generate_resfile_from_pdb # generate mutations using resfiles
from toolbox import mutate_residue # generate mutations using mutate_residue  

## changing directory to where PDB's are located (aka where PDB files are located )
os.chdir("~\Dropbox\Waterloo-iGEM-2015") #alter to your specific dropbox path
os.chdir("\Math Modelling\cas9_modification") ##where the WT cas9 should be located???
## not sure if completely correct???? add changes if not.

## initializing rosetta:
rosetta.init()

# import cleaning module for PDB to be usable
from toolbox import cleanATOM

cleanATOM("\4UN3.pdb") # cleaned PDB file to use for analysis
var_pose = pose_from_pdb("\4UN3.pdb") # initial pose created from clean pdb

#inputted residue number of interest
Num = raw_input("enter residue number:\n")

for i in range(0, 20):
	# list of Amino Acids to substitute
	AA_lst = "ACDEFGHIKLMNPQRSTVWY"
	AA_var = AA_lst[i]

	var_pose = pose_from_pdb("4UN3." + AA_var + ".clean.pdb")
	mutate_residue(var_pose, Num , AA_var) # where Num = residue number to substitute AA

	# for sanity checking purposes: prints out changed 4UN3 pdb pose protein profile 
	#  (sequence, # of res, what is located at Num residue - if the substitution occured)
Esempio n. 7
0
from rosetta import *
init()
import glob
from toolbox import cleanATOM


# step 1, locate the right directory, and cleanATOM
filenames = glob.glob('*.pdb')
for i, filename in enumerate(filenames):
    cleanATOM(filename)

# step 2, calculate the bond length and write into a .txt file
path = '/Users/XT/Downloads/top8000_chains_70/random_10_pdb/'
filenames = glob.glob(path+'*.clean.pdb')
f=open("NClength.txt","w+")
for i, filename in enumerate(filenames):
    #f.write(str(i+1)+'\t'+str(filename)+'\n')
    pose=pose_from_pdb(filename)
    for resi_num in range(1,pose.total_residue()+1):
        N_xyz = pose.residue(resi_num).xyz("N")
        CA_xyz = pose.residue(resi_num).xyz("CA")
        N_CA_vector = CA_xyz-N_xyz
        f.write(str(N_CA_vector.norm)+"\n")
f.close()
Esempio n. 8
0
def main(argv):

    #open list of pdbs
    with open(argv[0]) as afile:
        pdbs = afile.readlines()

    #open chain id dictionary
    with open(argv[1]) as afile:
        d = {
            item[0]: [
                item[1].split(","), item[2].split(","), item[3].split(","),
                item[4], item[5]
            ]
            for item in (line.split() for line in afile)
        }

    #loop through pdbs
    for p in pdbs:
        #get the pdb name of the pdb in question
        basename_p = os.path.basename(p).rstrip()

        #create lists of chain ids from d
        all_chains = d[basename_p][0]
        prot_chains = d[basename_p][1]
        pept_chains = d[basename_p][2]

        #clean pdb
        cleanATOM(p.rstrip())

        #new filename consisting of the path name + .clean.pdb
        filename = p.rsplit('.', 1)[0] + ".clean.pdb"

        #input the pose, split it by chain, and create a new pose with the correct PDBInfo
        pose = pose_from_pdb(filename.rstrip())
        pose.update_pose_chains_from_pdb_chains()

        chains = pose.split_by_chain()
        print len(chains)
        newpose = Pose()
        newpose.pdb_info(PDBInfo(newpose))

        #iterate through all chains in protein by iterating through the list of all_chains
        #if chain is found in list of protein chains append it to newpose by appending after seqpos
        pdb_counter = 0
        for idx, chain in enumerate(all_chains):
            if chain in prot_chains:
                newpose.append_residue_by_jump(chains[idx + 1].residue(1),
                                               newpose.total_residue(), "", "",
                                               0)
                pdb_counter += 1
                newpose.pdb_info().number(newpose.total_residue(), pdb_counter)
                newpose.pdb_info().chain(newpose.total_residue(), 'A')
                for i in range(2, chains[idx + 1].total_residue() + 1):
                    newpose.append_polymer_residue_after_seqpos(
                        chains[idx + 1].residue(i), newpose.total_residue(), 0)
                    pdb_counter += 1
                    newpose.pdb_info().number(newpose.total_residue(),
                                              pdb_counter)
                    newpose.pdb_info().chain(newpose.total_residue(), 'A')

        #then check list of peptide chains and append to newpose after jump
        idx_pept = all_chains.index(pept_chains[0]) + 1

        #determine which res to start and end from using the dict
        pdb_num_res_start = int(d[basename_p][3])
        pdb_num_res_end = int(d[basename_p][4])

        num_res_start = chains[idx_pept].pdb_info().pdb2pose(
            pept_chains[0], pdb_num_res_start)
        num_res_end = chains[idx_pept].pdb_info().pdb2pose(
            pept_chains[0], pdb_num_res_end)

        print num_res_start
        print num_res_end
        #append first residue of the trimmed peptide
        newpose.append_residue_by_jump(chains[idx_pept].residue(num_res_start),
                                       newpose.total_residue(), "", "", 1)
        newpose.pdb_info().number(newpose.total_residue(), 1)
        newpose.pdb_info().chain(newpose.total_residue(), 'B')

        #append remaining residues of the trimmed peptide
        for ind, i in zip(range(2, num_res_end + (2 - num_res_start)),
                          range(num_res_start + 1, num_res_end + 1)):
            newpose.append_polymer_residue_after_seqpos(
                chains[idx_pept].residue(i), newpose.total_residue(), 0)
            newpose.pdb_info().number(newpose.total_residue(), ind)
            newpose.pdb_info().chain(newpose.total_residue(), 'B')

        newpose.pdb_info().obsolete(0)

        #output Trimmed pdb
        tokens = p.rsplit('.', 1)
        file = tokens[0]
        print '%sTrimmedPep.pdb' % (file)
        newpose.dump_pdb('%sTrimmedPep.pdb' % (file.rstrip()))
Esempio n. 9
0
def main(argv):

    #open list of pdbs
    with open(argv[0]) as afile:
        pdbs = afile.readlines()
    
    #open chain id dictionary
    with open(argv[1]) as afile:
        d = { item[0] : [item[1].split(","),item[2].split(","),item[3].split(","),item[4],item[5]] for item in (line.split() for line in afile) }

    #loop through pdbs
    for p in pdbs:
        #get the pdb name of the pdb in question
	basename_p = os.path.basename(p).rstrip()

	#create lists of chain ids from d
	all_chains = d[basename_p][0]
	prot_chains = d[basename_p][1]
	pept_chains = d[basename_p][2]

	#clean pdb
        cleanATOM(p.rstrip())
	
	#new filename consisting of the path name + .clean.pdb
	filename = p.rsplit('.',1)[0] + ".clean.pdb"
	
	#input the pose, split it by chain, and create a new pose with the correct PDBInfo
        pose = pose_from_pdb(filename.rstrip())
        pose.update_pose_chains_from_pdb_chains()
	
        chains=pose.split_by_chain()
	print len(chains)
	newpose = Pose()
	newpose.pdb_info( PDBInfo( newpose ) )

	#iterate through all chains in protein by iterating through the list of all_chains
	#if chain is found in list of protein chains append it to newpose by appending after seqpos
	pdb_counter = 0
	for idx, chain in enumerate(all_chains): 
	    if chain in prot_chains: 
		newpose.append_residue_by_jump(chains[idx+1].residue(1),newpose.total_residue(),"","",0)
        	pdb_counter+=1
		newpose.pdb_info().number(newpose.total_residue(),pdb_counter)
                newpose.pdb_info().chain(newpose.total_residue(),'A')
		for i in range(2,chains[idx+1].total_residue()+1):
                    newpose.append_polymer_residue_after_seqpos(chains[idx+1].residue(i), newpose.total_residue(),0)
		    pdb_counter+=1
                    newpose.pdb_info().number(newpose.total_residue(),pdb_counter)
	            newpose.pdb_info().chain(newpose.total_residue(),'A')
	
	#then check list of peptide chains and append to newpose after jump
	idx_pept = all_chains.index( pept_chains[0])+1
	
	#determine which res to start and end from using the dict
	pdb_num_res_start = int(d[basename_p][3])
	pdb_num_res_end = int(d[basename_p][4])
	
	num_res_start = chains[ idx_pept ].pdb_info().pdb2pose( pept_chains[0], pdb_num_res_start )
	num_res_end = chains[ idx_pept ].pdb_info().pdb2pose( pept_chains[0], pdb_num_res_end )
	
	print num_res_start
	print num_res_end
	#append first residue of the trimmed peptide	
	newpose.append_residue_by_jump(chains[idx_pept].residue(num_res_start),newpose.total_residue(), "","",1)
        newpose.pdb_info().number(newpose.total_residue(),1)
        newpose.pdb_info().chain(newpose.total_residue(),'B')	

	#append remaining residues of the trimmed peptide
	for ind,i in zip(range(2,num_res_end+(2-num_res_start)),range(num_res_start+1,num_res_end+1)):
            newpose.append_polymer_residue_after_seqpos(chains[idx_pept].residue(i), newpose.total_residue(),0)	
	    newpose.pdb_info().number(newpose.total_residue(),ind)
	    newpose.pdb_info().chain(newpose.total_residue(),'B')
	
	newpose.pdb_info().obsolete(0)
	
	#output Trimmed pdb
	tokens=p.rsplit('.',1)
	file=tokens[0]
	print '%sTrimmedPep.pdb' % (file)
        newpose.dump_pdb('%sTrimmedPep.pdb' % (file.rstrip()))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('pdb_directory', action="store", type=str)
    inputs = parser.parse_args()
    #takes name of pdb file without the extention
    for pdb_file in glob.glob(inputs.pdb_directory + '*.pdb'):
        clean_pdb_file = pdb_file.replace('.pdb', '.clean.pdb')
        print('#######################')
        print('#######################{}'.format(pdb_file))
        if 'clean' in pdb_file:
            print('Will overwrite an existing clean pdb so am skipping')
            continue

        fasta_outfile_loc = pdb_file.replace('/PDBs/', '/wt_fastas/').replace(
            '.pdb', '.fasta')

        #Load and clean up pdb file
        cleanATOM(pdb_file)

        with open(clean_pdb_file, 'r') as infile:
            old_lines = infile.readlines()

        pdb_io = PDB.PDBIO()
        pdb_parser = PDB.PDBParser()
        structure = pdb_parser.get_structure(" ", clean_pdb_file)

        if len(structure) != 1:
            print(
                'THERE APPEARS TO BE MORE THAN ONE MODEL IN THIS STRUCTURE BEHAVIOR OF PRORAM IS UNKNOWN ({}). EXITING'
                .format(clean_pdb_file))
            continue

        chain_counts = {}
        for model in structure:
            for chain in model:
                new_number = 1
                for i, residue in enumerate(chain.get_residues()):
                    res_id = list(residue.id)
                    if res_id[1] != new_number:
                        res_id[1] = new_number
                        residue.id = tuple(res_id)
                    new_number += 1
                chain_counts[chain.id] = new_number

        chains = sorted(chain_counts.items(), key=lambda x: x[1])
        chain_to_keep = chains[-1][0]
        chains_to_delete = chains[:-1]
        chains_to_delete = [i for i, j in chains_to_delete]
        for i, j in enumerate(chains_to_delete):
            structure[0].detach_child(chains_to_delete[i])
        pdb_io.set_structure(structure)
        pdb_io.save(clean_pdb_file)

        for model in structure:
            for chain in model:
                print('kept ID {} and deleted {}'.format(
                    chain.id, chains_to_delete))
                seq_list = []
                chainID = chain.get_id()
                for residue in chain:
                    if is_aa(residue.get_resname(), standard=True):
                        seq_list.append(three_to_one(residue.get_resname()))
                    else:
                        seq_list.append('X')
                wt_seq = ''.join(seq_list)

        with open(fasta_outfile_loc, 'w') as outfile:
            outfile.write('>{}\n{}\n'.format('WT', wt_seq))
	
	return VDW

def calc_Hbond(structure):
	''''Calculates hydrogen using PyRosetta scoring function'''
	E=0
	#H_bond types
	H_bond_lst=[hbond_sr_bb,hbond_lr_bb,hbond_bb_sc,hbond_sc]
	for i in range(len(H_bond_lst)):
		#gets Hbond, adds to total energy
		E+=scorefxn.score_by_scoretype(structure,H_bond_lst[i])
	return E

scorefxn=get_fa_scorefxn()

cleanATOM('1YY9.pdb')
cleanATOM('EGFR.pdb')
cleanATOM('centuximab.pdb')

#Gets complexes, makes them into pose
complex=pose_from_pdb('1YY9.clean.pdb')
EGFR=pose_from_pdb('EGFR.clean.pdb')
cent=pose_from_pdb('centuximab.clean.pdb')

structures=[complex,EGFR,cent]
#for labeling energies as they are printed
struct_names=['1YY9','EGFR','Centuximab']
def main():
	for j in range(len(structures)):
		#total pose energy FA score
		Energy=scorefxn(structures[j])
Esempio n. 12
0
def main():
    #takes name of pdb file without the extention
    args = sys.argv
    pdb_file = args[1]
    #set up timer to figure out how long the code took to run
    t0 = time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Amino acids, notice there is no C
    AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q",
           "R", "S", "T", "V", "W", "Y")
    #Number of mutations to accept
    max_accept_mut = 5000
    #Population size
    N = 100
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load and clean up pdb file
    name = pdb_file + ".pdb"
    cleanATOM(name)
    clean_name = pdb_file + ".clean.pdb"
    initial_pose = pose_from_pdb(clean_name)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    pdb_name = str(pdb_file) + "_min.pdb"
    initial_pose.dump_pdb(pdb_name)

    #Set threshold for selection
    #threshold = post_pre_packing_score/2
    #threshold = post_pre_packing_score

    data.append(str(pdb_file) + str(post_pre_packing_score) + ',0.0,0.0,0\n')

    data_filename = pdb_file + '.score'
    with open(data_filename, "w") as f:
        f.writelines(data)

    print 'Data written to:', data_filename
    '''
Esempio n. 13
0
from toolbox import generate_resfile_from_pdb  # generate mutations using resfiles
from toolbox import mutate_residue  # generate mutations using mutate_residue

## changing directory to where PDB's are located (aka where PDB files are located )
os.chdir("~\Dropbox\Waterloo-iGEM-2015")  #alter to your specific dropbox path
os.chdir("\Math Modelling\cas9_modification"
         )  ##where the WT cas9 should be located???
## not sure if completely correct???? add changes if not.

## initializing rosetta:
rosetta.init()

# import cleaning module for PDB to be usable
from toolbox import cleanATOM

cleanATOM("\4UN3.pdb")  # cleaned PDB file to use for analysis
var_pose = pose_from_pdb("\4UN3.pdb")  # initial pose created from clean pdb

#inputted residue number of interest
Num = raw_input("enter residue number:\n")

for i in range(0, 20):
    # list of Amino Acids to substitute
    AA_lst = "ACDEFGHIKLMNPQRSTVWY"
    AA_var = AA_lst[i]

    var_pose = pose_from_pdb("4UN3." + AA_var + ".clean.pdb")
    mutate_residue(var_pose, Num,
                   AA_var)  # where Num = residue number to substitute AA

    # for sanity checking purposes: prints out changed 4UN3 pdb pose protein profile
def makeFoldxRepair(name):

  print("cleaning:", name)
  cleanATOM(name)
Esempio n. 15
0
	cleanpdb = "%s.clean.pdb" % pdb
	alphaName = "%s_alpha.txt" % pdb
	betaName = "%s_beta.txt" % pdb
	testName = "test_%s.txt" % pdb
		
	alphalst = []
	with open(alphaName, 'r') as fa:
		lst = fa.read()
		alphalst.extend(lst.split(','))

	betalst = []
	with open(betaName, 'r') as fb:
		lst = fb.read()
		betalst.extend(lst.split(','))	

	cleanATOM(pdbName)
	pose = pose_from_pdb(cleanpdb)

	seq = pose.sequence()
	print (pose.total_residue(), len(seq))
    with open(testName, 'w') as ft:
            lines = []      
            for i in range(1,pose.total_residue() + 1):
                    if str(i) in alphalst:
                            lines.append('{0},{1}\n'.format(aminoAcidList.index(str(seq[i-1])), 1))
                    elif str(i) in betalst:
                            lines.append('{0},{1}\n'.format(aminoAcidList.index(str(seq[i-1])), 2))
                    else:
                            lines.append('{0},{1}\n'.format(aminoAcidList.index(str(seq[i-1])), 0))
            ft.writelines(lines)
Esempio n. 16
0
def main(argv):

    #args = sys.argv
    with open(argv[0]) as afile:
        pdbs = afile.readlines()

    with open(argv[1]) as afile:
        d = {
            item[0]:
            [item[1].split(","), item[2].split(","), item[3].split(",")]
            for item in (line.split() for line in afile)
        }

    for p in pdbs:

        basename_p = os.path.basename(p).rstrip()

        all_chains = d[basename_p][0]
        prot_chains = d[basename_p][1]
        pept_chains = d[basename_p][2]

        cleanATOM(p.rstrip())

        filename = p.split('.')[0] + ".clean.pdb"

        pose = pose_from_pdb(filename.rstrip())
        pose.update_pose_chains_from_pdb_chains()

        chains = pose.split_by_chain()

        newpose = Pose()
        newpose.pdb_info(PDBInfo(newpose))

        #iterate through all chains in protein by iterating through the list if chain is found in list of protein chains append it to newpose by appending after seqpos
        pdb_counter = 0
        for idx, chain in enumerate(all_chains):
            if chain in prot_chains:
                newpose.append_residue_by_jump(chains[idx + 1].residue(1),
                                               newpose.total_residue(), "", "",
                                               0)
                pdb_counter += 1
                newpose.pdb_info().number(newpose.total_residue(), pdb_counter)
                newpose.pdb_info().chain(newpose.total_residue(), 'A')
                for i in range(2, chains[idx + 1].total_residue() + 1):
                    newpose.append_polymer_residue_after_seqpos(
                        chains[idx + 1].residue(i), newpose.total_residue(), 0)
                    pdb_counter += 1
                    newpose.pdb_info().number(newpose.total_residue(),
                                              pdb_counter)
                    newpose.pdb_info().chain(newpose.total_residue(), 'A')
        #then check list of peptide chains and append to newpose after jump
        idx_pept = all_chains.index(pept_chains[0]) + 1

        num_res_extra = chains[idx_pept].total_residue() - 8
        print num_res_extra
        num_res_offset = num_res_extra / 2 + 1
        print num_res_offset

        print newpose.pdb_info()

        newpose.append_residue_by_jump(
            chains[idx_pept].residue(num_res_offset), newpose.total_residue(),
            "", "", 1)
        newpose.pdb_info().number(newpose.total_residue(), 1)
        newpose.pdb_info().chain(newpose.total_residue(), 'B')

        for ind, i in zip(range(2, 9),
                          range(num_res_offset + 1, num_res_offset + 8)):
            newpose.append_polymer_residue_after_seqpos(
                chains[idx_pept].residue(i), newpose.total_residue(), 0)
            newpose.pdb_info().number(newpose.total_residue(), ind)
            newpose.pdb_info().chain(newpose.total_residue(), 'B')

        print chains[idx_pept]

        print newpose

        newpose.pdb_info().obsolete(0)

        print newpose.pdb_info()
        tokens = p.split('.')
        file = tokens[0]
        print '%sTrimmedPep.pdb' % (file)
        newpose.dump_pdb('%sTrimmedPep.pdb' % (file.rstrip()))
Esempio n. 17
0
def main(argv):

    #args = sys.argv
    with open(argv[0]) as afile:
        pdbs = afile.readlines()
    
    with open(argv[1]) as afile:
        d = { item[0] : [item[1].split(","),item[2].split(","),item[3].split(",")] for item in (line.split() for line in afile) }

    for p in pdbs:

	basename_p = os.path.basename(p).rstrip()

	all_chains = d[basename_p][0]
	prot_chains = d[basename_p][1]
	pept_chains = d[basename_p][2]

        cleanATOM(p.rstrip())
	
	filename = p.split('.')[0] + ".clean.pdb"
	
        pose = pose_from_pdb(filename.rstrip())
        pose.update_pose_chains_from_pdb_chains()
	
        chains=pose.split_by_chain()
        
	newpose = Pose()
	newpose.pdb_info( PDBInfo( newpose ) )

	#iterate through all chains in protein by iterating through the list if chain is found in list of protein chains append it to newpose by appending after seqpos
	pdb_counter = 0
	for idx, chain in enumerate(all_chains): 
	    if chain in prot_chains: 
		newpose.append_residue_by_jump(chains[idx+1].residue(1),newpose.total_residue(),"","",0)
        	pdb_counter+=1
		newpose.pdb_info().number(newpose.total_residue(),pdb_counter)
                newpose.pdb_info().chain(newpose.total_residue(),'A')
		for i in range(2,chains[idx+1].total_residue()+1):
                    newpose.append_polymer_residue_after_seqpos(chains[idx+1].residue(i), newpose.total_residue(),0)
		    pdb_counter+=1
                    newpose.pdb_info().number(newpose.total_residue(),pdb_counter)
	            newpose.pdb_info().chain(newpose.total_residue(),'A')
	#then check list of peptide chains and append to newpose after jump
	idx_pept = all_chains.index( pept_chains[0])+1

	num_res_extra = chains[idx_pept].total_residue() - 8
	print num_res_extra
	num_res_offset = num_res_extra/2 + 1
	print num_res_offset	

	print newpose.pdb_info()	

	newpose.append_residue_by_jump(chains[idx_pept].residue(num_res_offset),newpose.total_residue(), "","",1)
        newpose.pdb_info().number(newpose.total_residue(),1)
        newpose.pdb_info().chain(newpose.total_residue(),'B')	

	for ind,i in zip(range(2,9),range(num_res_offset+1,num_res_offset+8)):
            newpose.append_polymer_residue_after_seqpos(chains[idx_pept].residue(i), newpose.total_residue(),0)	
	    newpose.pdb_info().number(newpose.total_residue(),ind)
	    newpose.pdb_info().chain(newpose.total_residue(),'B')
	
	print chains[idx_pept]
	
	print newpose	
	
	newpose.pdb_info().obsolete(0)
	
	print newpose.pdb_info()
	tokens=p.split('.')
	file=tokens[0]
	print '%sTrimmedPep.pdb' % (file)
        newpose.dump_pdb('%sTrimmedPep.pdb' % (file.rstrip()))