mean_designed_protein_length = 0 designed_protein_lengths = [] for method in methods: for protein in PDBS: designed_length = [0] pdb_id = protein[0:4].upper() chain_id = protein[4] file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta" fileparts = re.split("_",file) print "Processsing file: " + file print "PDB: " + pdb_id print "CHAIN: " + chain_id searchPDB = pdb_id + "_" + chain_id + ".pdb" #This is the pdb file that is parsered by dssp pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file natural_sequences = af.get_natural_sequences_duncan(file) #Gets a list with the natural sequences ancestor = natural_sequences[0] #Gets the "ancestral sequence" ancestor_length = len(ancestor) #Grab the length of the ancestral sequence #print "Ancestor Length: " + str(ancestor_length) counter = 0 gaps = 0 gap_locations = [] #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids while (counter < ancestor_length): acid = ancestor[counter] if acid == '-': gaps = gaps + 1 gap_locations.append(counter) #This is an array that tracks which residues have gaps counter = counter + 1
duncan_designed_sequence_path = "/Users/qian/Desktop/max_score_evol_sim/sequences/designed_sequences/" duncan_natural_sequence_path = "/Users/qian/Desktop/max_score_evol_sim/sequences/aligned_sequences/" duncan_structure_path = "/Users/qian/Desktop/max_score_evol_sim/structures/" for protein in PDBS: pdb_id = protein[0:4].upper() chain_id = protein[4] file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta" fileparts = re.split("_", file) print "Processsing file: " + file print "PDB: " + pdb_id print "CHAIN: " + chain_id searchPDB = pdb_id + "_" + chain_id + ".pdb" #This is the pdb file that is parsered by dssp pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file that dssp will be parsing. natural_sequences = af.get_natural_sequences_duncan( file) #Gets the natural sequences ancestor = natural_sequences[0] ancestor_length = len(ancestor) #Grab the length of the ancestral sequence #print "Ancestor Length: " + str(ancestor_length) counter = 0 gaps = 0 gap_locations = [] #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids while (counter < ancestor_length): acid = ancestor[counter] if acid == '-': gaps = gaps + 1 gap_locations.append( counter ) #This is an array that tracks which residues have gaps
duncan_designed_sequence_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/sequences/duncan_sequences/designed_sequences/" duncan_natural_sequence_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/sequences/duncan_sequences/aligned_sequences/" duncan_structure_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/structures/duncan_structures/" for protein in PDBS: pdb_id = protein[0:4].upper() chain_id = protein[4] file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta" fileparts = re.split("_",file) print "Processsing file: " + file print "PDB: " + pdb_id print "CHAIN: " + chain_id searchPDB = pdb_id + "_" + chain_id + ".pdb" #This is the pdb file that is parsered by dssp pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file that dssp will be parsing. natural_sequences = af.get_natural_sequences_duncan(file) #Gets the natural sequences ancestor = natural_sequences[0] ancestor_length = len(ancestor) #Grab the length of the ancestral sequence #print "Ancestor Length: " + str(ancestor_length) counter = 0 gaps = 0 gap_locations = [] #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids while (counter < ancestor_length): acid = ancestor[counter] if acid == '-': gaps = gaps + 1 gap_locations.append(counter) #This is an array that tracks which residues have gaps counter = counter + 1
duncan_natural_sequence_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/sequences/duncan_sequences/aligned_sequences/" duncan_structure_path = "/Users/Eleisha/Documents/Wilke_Lab/Project_1/project/structures/duncan_structures/" for protein in PDBS: pdb_id = protein[0:4].upper() chain_id = protein[4] file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta" fileparts = re.split("_",file) print "Processsing file: " + file print "PDB: " + pdb_id print "CHAIN: " + chain_id searchPDB = pdb_id + "_" + chain_id + ".pdb" #This is the pdb file that is parsered by dssp pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file that dssp will be parsing. natural_sequences = af.get_natural_sequences_duncan(file) #Splits alignments into designed and natural alignment files ancestor = natural_sequences[0] ancestor_length = len(ancestor) #Grab the length of the ancestral sequence print "Ancestor Length: " + str(ancestor_length) print "Length of Alignment: " + str(len(natural_sequences)) counter = 0 gaps = 0 gap_locations = [] #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids while (counter < ancestor_length): acid = ancestor[counter] if acid == '-': gaps = gaps + 1 gap_locations.append(counter) #This is an array that tracks which residues have gaps counter = counter + 1
mean_designed_protein_length = 0 designed_protein_lengths = [] for temp in temps: for protein in PDBS: designed_length = [0] pdb_id = protein[0:4].upper() chain_id = protein[4] file = pdb_id + "_" + chain_id + "_Aligned_Sequences.fasta" fileparts = re.split("_",file) print "Processsing file: " + file print "PDB: " + pdb_id print "CHAIN: " + chain_id searchPDB = pdb_id + "_" + chain_id + ".pdb" #This is the pdb file that is parsered by dssp pdbLocation = duncan_structure_path + searchPDB #This is the location of the pdb file natural_sequences = af.get_natural_sequences_duncan(file) #Gets a list with the natural sequences ancestor = natural_sequences[0] #Gets the "ancestral sequence" ancestor_length = len(ancestor) #Grab the length of the ancestral sequence #print "Ancestor Length: " + str(ancestor_length) counter = 0 gaps = 0 gap_locations = [] #Counts the gaps within the ancestral sequence in the alignment. We must take the gaps out before counting the amino acids while (counter < ancestor_length): acid = ancestor[counter] if acid == '-': gaps = gaps + 1 gap_locations.append(counter) #This is an array that tracks which residues have gaps counter = counter + 1