Esempio n. 1
0
mean_designed_protein_length = 0
designed_protein_lengths = []
for method in methods:  
    for protein in PDBS:
    	designed_length = [0]
        pdb_id = protein[0:4].upper()
        chain_id = protein[4] 
        file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta"

        fileparts = re.split("_",file)
        print "Processsing file: " + file	
        print "PDB: " + pdb_id
        print "CHAIN: " + chain_id
        searchPDB = pdb_id + "_" + chain_id + ".pdb"  #This is the pdb file that is parsered by dssp
        pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file
        natural_sequences =  af.get_natural_sequences_duncan(file) #Gets a list with the natural sequences  
        ancestor = natural_sequences[0] #Gets the "ancestral sequence"
        ancestor_length = len(ancestor) #Grab the length of the ancestral sequence
        #print "Ancestor Length: " + str(ancestor_length)
        counter = 0
        gaps = 0
        gap_locations = []

        #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids
        while (counter < ancestor_length):
            acid = ancestor[counter]
            if acid == '-': 
                gaps = gaps + 1
                gap_locations.append(counter) #This is an array that tracks which residues have gaps
            counter = counter + 1
Esempio n. 2
0
duncan_designed_sequence_path = "/Users/qian/Desktop/max_score_evol_sim/sequences/designed_sequences/"
duncan_natural_sequence_path = "/Users/qian/Desktop/max_score_evol_sim/sequences/aligned_sequences/"
duncan_structure_path = "/Users/qian/Desktop/max_score_evol_sim/structures/"

for protein in PDBS:
    pdb_id = protein[0:4].upper()
    chain_id = protein[4]
    file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta"
    fileparts = re.split("_", file)

    print "Processsing file: " + file
    print "PDB: " + pdb_id
    print "CHAIN: " + chain_id
    searchPDB = pdb_id + "_" + chain_id + ".pdb"  #This is the pdb file that is parsered by dssp
    pdbLocation = duncan_structure_path + searchPDB  #This is the location of the pdb file that dssp will be parsing.
    natural_sequences = af.get_natural_sequences_duncan(
        file)  #Gets the natural sequences
    ancestor = natural_sequences[0]
    ancestor_length = len(ancestor)  #Grab the length of the ancestral sequence
    #print "Ancestor Length: " + str(ancestor_length)
    counter = 0
    gaps = 0
    gap_locations = []

    #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids
    while (counter < ancestor_length):
        acid = ancestor[counter]
        if acid == '-':
            gaps = gaps + 1
            gap_locations.append(
                counter
            )  #This is an array that tracks which residues have gaps
duncan_designed_sequence_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/sequences/duncan_sequences/designed_sequences/"
duncan_natural_sequence_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/sequences/duncan_sequences/aligned_sequences/"
duncan_structure_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/structures/duncan_structures/"

for protein in PDBS:
    pdb_id = protein[0:4].upper()
    chain_id = protein[4] 
    file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta"
    fileparts = re.split("_",file)
 
    print "Processsing file: " + file	
    print "PDB: " + pdb_id
    print "CHAIN: " + chain_id
    searchPDB = pdb_id + "_" + chain_id + ".pdb"  #This is the pdb file that is parsered by dssp
    pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file that dssp will be parsing. 
    natural_sequences = af.get_natural_sequences_duncan(file) #Gets the natural sequences  
    ancestor = natural_sequences[0]
    ancestor_length = len(ancestor) #Grab the length of the ancestral sequence
    #print "Ancestor Length: " + str(ancestor_length)
    counter = 0
    gaps = 0
    gap_locations = []

    #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids
    while (counter < ancestor_length):
        acid = ancestor[counter]
        if acid == '-': 
            gaps = gaps + 1
            gap_locations.append(counter) #This is an array that tracks which residues have gaps
        counter = counter + 1
duncan_natural_sequence_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/sequences/duncan_sequences/aligned_sequences/"
duncan_structure_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/structures/duncan_structures/"


for protein in PDBS:
    pdb_id = protein[0:4].upper()
    chain_id = protein[4] 
    file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta"
    fileparts = re.split("_",file)
    print "Processsing file: " + file	
    print "PDB: " + pdb_id
    print "CHAIN: " + chain_id
    searchPDB = pdb_id + "_" + chain_id + ".pdb"  #This is the pdb file that is parsered by dssp
    pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file that dssp will be parsing. 

    natural_sequences =  af.get_natural_sequences_duncan(file) #Splits alignments into designed and natural alignment files   
    ancestor = natural_sequences[0]
    ancestor_length = len(ancestor) #Grab the length of the ancestral sequence
    print "Ancestor Length: " + str(ancestor_length)
    print "Length of Alignment: " + str(len(natural_sequences))
    counter = 0
    gaps = 0
    gap_locations = []

    #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids
    while (counter < ancestor_length):
        acid = ancestor[counter]
        if acid == '-': 
            gaps = gaps + 1
            gap_locations.append(counter) #This is an array that tracks which residues have gaps
        counter = counter + 1
mean_designed_protein_length = 0
designed_protein_lengths = []
for temp in temps:  
    for protein in PDBS:
    	designed_length = [0]
        pdb_id = protein[0:4].upper()
        chain_id = protein[4] 
        file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta"

        fileparts = re.split("_",file)
        print "Processsing file: " + file	
        print "PDB: " + pdb_id
        print "CHAIN: " + chain_id
        searchPDB = pdb_id + "_" + chain_id + ".pdb"  #This is the pdb file that is parsered by dssp
        pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file
        natural_sequences =  af.get_natural_sequences_duncan(file) #Gets a list with the natural sequences  
        ancestor = natural_sequences[0] #Gets the "ancestral sequence"
        ancestor_length = len(ancestor) #Grab the length of the ancestral sequence
        #print "Ancestor Length: " + str(ancestor_length)
        counter = 0
        gaps = 0
        gap_locations = []

        #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids
        while (counter < ancestor_length):
            acid = ancestor[counter]
            if acid == '-': 
                gaps = gaps + 1
                gap_locations.append(counter) #This is an array that tracks which residues have gaps
            counter = counter + 1