def score(PDBfile): """ Calculates the m-score for a given PDB file arguments: PDBfile - the PDB file to score hidden arguments: aas.scr, pro.scr, gly.scr - the scoring tables need to be present in working directory """ from pro_angle import find_residue from Bio.PDB.PDBParser import PDBParser from pro_length import length (aas, gly, pro) = load_scores() ##define global tables score = 0 #initialize pars = PDBParser(PERMISSIVE = 1) struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile) model = struct.child_list[0] chain = model.child_list[0] pro_list = find_residue(chain, 'PRO') gly_list = find_residue(chain, 'GLY') aas_list = range(chain.child_list[1].id[1], chain.child_list[len(chain)-1].id[1]) #need to remove pro/gly indices in first/last position if pro_list.count(1) > 0: pro_list.remove(1) if pro_list.count(len(chain)-1) > 0: pro_list.remove(len(chain)-1) if gly_list.count(1) > 0: gly_list.remove(1) if gly_list.count(len(chain)-1) > 0: gly_list.remove(len(chain)-1) try: for index in pro_list: aas_list.remove(index) #remove pros from aas_list for index in gly_list: aas_list.remove(index) #remove glys from aas_list except ValueError: print 'incosistency in PDB file - will return score = 0' return 0 else: proscore = score_help(chain, pro_list, pro) glyscore = score_help(chain, gly_list, gly) aasscore = score_help(chain, aas_list, aas) score = proscore+glyscore+aasscore size=length(chain) try: score = (score/size)*1000 #normalize score return score except ZeroDivisionError: print "calculated protein length 0 -> returning score 0" score = 0 return score
def score(PDBfile): """ Calculates the m-score for a given PDB file arguments: PDBfile - the PDB file to score hidden arguments: aas.scr, pro.scr, gly.scr - the scoring tables need to be present in working directory """ from pro_angle import find_residue from Bio.PDB.PDBParser import PDBParser from pro_length import length (aas, gly, pro) = load_scores() ##define global tables score = 0 #initialize pars = PDBParser(PERMISSIVE=1) struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile) model = struct.child_list[0] chain = model.child_list[0] pro_list = find_residue(chain, 'PRO') gly_list = find_residue(chain, 'GLY') aas_list = range(chain.child_list[1].id[1], chain.child_list[len(chain) - 1].id[1]) #need to remove pro/gly indices in first/last position if pro_list.count(1) > 0: pro_list.remove(1) if pro_list.count(len(chain) - 1) > 0: pro_list.remove(len(chain) - 1) if gly_list.count(1) > 0: gly_list.remove(1) if gly_list.count(len(chain) - 1) > 0: gly_list.remove(len(chain) - 1) try: for index in pro_list: aas_list.remove(index) #remove pros from aas_list for index in gly_list: aas_list.remove(index) #remove glys from aas_list except ValueError: print 'incosistency in PDB file - will return score = 0' return 0 else: proscore = score_help(chain, pro_list, pro) glyscore = score_help(chain, gly_list, gly) aasscore = score_help(chain, aas_list, aas) score = proscore + glyscore + aasscore size = length(chain) try: score = (score / size) * 1000 #normalize score return score except ZeroDivisionError: print "calculated protein length 0 -> returning score 0" score = 0 return score
def score(PDBfile): """ Calculates the m-score for a given PDB file arguments: PDBfile - the PDB file to score hidden arguments: aas.scr, pro.scr, gly.scr - the scoring tables need to be present in working directory """ from pro_angle import find_residue from Bio.PDB.PDBParser import PDBParser from pro_length import length import os import string score = 0 #initialize pars = PDBParser(PERMISSIVE = 1) struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile) model = struct.child_list[0] chain = model.child_list[0] score = float(0) size=length(chain) for res_index in range(1, size-2): #not first or last res res = chain.child_list[res_index] cur = res.resname pre = chain.child_list[res_index-1].resname pos = chain.child_list[res_index+1].resname filename = pre + '_' + cur + '_' + pos + '.scr' table_file = '/home/marciovm/proteins/bdtrimers/' + string.lower(cur) + '/' + filename chain_index = chain.child_list[res_index].id[1] table = load_scores(table_file) if table != 0: new = score_help(chain, chain_index, table) else: new = 0 score = score + new try: score = (score/size)*1000 #normalize score return score except ZeroDivisionError: print "calculated protein length 0 -> returning score 0" score = 0 return score
def score(PDBfile, start_index=1, end_index=9999, smoothed='y', mutated_list=' '): """ Calculates the m-score for a given PDB file arguments: PDBfile - the PDB file to score start_index - child_list stat index of residues to look at end_index - child_list end index hidden arguments: aas.scr, pro.scr, gly.scr - the scoring tables need to be present in working directory """ from pro_angle import find_residue from Bio.PDB.PDBParser import PDBParser from pro_length import length import os import string score = 0 #initialize pars = PDBParser(PERMISSIVE=1) struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile) model = struct.child_list[0] chain = model.child_list[0] score = float(0) size = length(chain) if end_index > (size - 2): end_index = size - 2 #non-mutated score if mutated_list == ' ': for res_index in range(start_index, end_index): #not first or last res res = chain.child_list[res_index] cur = res.resname pre = chain.child_list[res_index - 1].resname pos = chain.child_list[res_index + 1].resname if smoothed == 'y': filename = pre + '_' + cur + '_' + pos + '.smooth.scr' else: filename = pre + '_' + cur + '_' + pos + '.scr' table_file = '/home/marciovm/proteins/bdtrimers/' + \ string.lower(cur) + '/' + filename chain_index = chain.child_list[res_index].id[1] # print 'loading table file: ' + table_file ### debugging table = load_scores(table_file) if table != 0: new = score_help(chain, chain_index, table) else: new = 0 score = score + new else: mutated_form = mutated_list.split() for res_index in range(start_index, end_index): res = chain.child_list[res_index] mutated_index = res_index - start_index cur = mutated_form[res_index - start_index] if mutated_index == 0: pre = chain.child_list[res_index - 1].resname else: pre = mutated_form[mutated_index - 1] if mutated_index == end_index - start_index - 1: pos = chain.child_list[res_index + 1].resname else: pos = mutated_form[mutated_index + 1] if smoothed == 'y': filename = pre + '_' + cur + '_' + pos + '.smooth.scr' else: filename = pre + '_' + cur + '_' + pos + '.scr' table_file = '/home/marciovm/proteins/bdtrimers/' + \ string.lower(cur) + '/' + filename chain_index = chain.child_list[res_index].id[1] # print 'loading table file: ' + table_file ##debugging table = load_scores(table_file) if table != 0: new = score_help(chain, chain_index, table) else: new = 0 score = score + new try: score = (score / size) * 1000 #normalize score return score except ZeroDivisionError: print "calculated protein length 0 -> returning score 0" score = 0 return score
def compile_trimers(): import os import commands import sys import string from Bio.PDB.PDBParser import PDBParser from pro_length import length os.chdir('/home/marciovm/proteins/bigdist') ##choose directory list = commands.getoutput('ls *.ent').split() #list of pdb files w/ chain names start = sys.argv[1] end = sys.argv[2] aas = ['GLY', 'ALA', 'VAL', 'LEU', 'ILE', 'SER', 'THR', 'ASN', 'GLN', 'PHE', 'TYR', 'TRP', 'CYS', 'MET', 'PRO', 'ASP', 'GLU', 'LYS', 'ARG', 'HIS'] for pdb_index in range(string.atoi(start), string.atoi(end)): pdb = list[pdb_index] pars = PDBParser(PERMISSIVE = 1) struct = pars.get_structure(pdb, pdb) model = struct.child_list[0] if pdb[4] == '.': chain_name = ' ' else: chain_name = string.upper(pdb[4]) chain = model.child_dict[chain_name] print 'processing ' + pdb + ' index: ' + str(pdb_index) ##find first real residue index = 0 res = 'first' while aas.count(res) == 0 and index < 10000: try: res = chain.child_list[index].resname except KeyError: pass #nothing here index += 1 res = chain.child_list[index].resname ##should be 2nd real residue ##find last real residue final_res_index = length(chain) + index - 3 ##2nd to last real residue ##look at everything in between for aa_index in range(index, final_res_index): aacur = chain.child_list[aa_index].resname aapre = chain.child_list[aa_index - 1].resname aapost= chain.child_list[aa_index + 1].resname seq = aapre + '_' + aacur + "_" + aapost + '.dat' filename = ('/home/marciovm/proteins/bdtrimers/' + string.lower(aacur) + '/' + seq) try: file_out = open(filename, 'a') except IOError: print "IOError line 63, nostandard aa name in PDB" else: tuple = calc_all_dihedrals(chain, aa_index) if tuple: if len(tuple) > 2: file_out.write(str(tuple[0]).zfill(6) + ' ' +str(tuple[1]).zfill(6)+ ' ' +str(tuple[2]).zfill(6)+ ' ' +str(tuple[3]).zfill(6)+'\n') else: file_out.write(str(tuple[0]).zfill(6)+ ' ' +str(tuple[1]).zfill(6)+'\n') file_out.close()
def score(PDBfile, start_index = 1, end_index = 9999, smoothed = 'y', mutated_list = ' '): """ Calculates the m-score for a given PDB file arguments: PDBfile - the PDB file to score start_index - child_list stat index of residues to look at end_index - child_list end index hidden arguments: aas.scr, pro.scr, gly.scr - the scoring tables need to be present in working directory """ from pro_angle import find_residue from Bio.PDB.PDBParser import PDBParser from pro_length import length import os import string score = 0 #initialize pars = PDBParser(PERMISSIVE = 1) struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile) model = struct.child_list[0] chain = model.child_list[0] score = float(0) size=length(chain) if end_index > (size - 2) : end_index = size - 2 #non-mutated score if mutated_list == ' ': for res_index in range(start_index, end_index): #not first or last res res = chain.child_list[res_index] cur = res.resname pre = chain.child_list[res_index-1].resname pos = chain.child_list[res_index+1].resname if smoothed == 'y': filename = pre + '_' + cur + '_' + pos + '.smooth.scr' else: filename = pre + '_' + cur + '_' + pos + '.scr' table_file = '/home/marciovm/proteins/bdtrimers/' + \ string.lower(cur) + '/' + filename chain_index = chain.child_list[res_index].id[1] # print 'loading table file: ' + table_file ### debugging table = load_scores(table_file) if table != 0: new = score_help(chain, chain_index, table) else: new = 0 score = score + new else: mutated_form = mutated_list.split() for res_index in range(start_index, end_index): res = chain.child_list[res_index] mutated_index = res_index - start_index cur = mutated_form[res_index-start_index] if mutated_index == 0: pre = chain.child_list[res_index-1].resname else: pre = mutated_form[mutated_index-1] if mutated_index == end_index - start_index - 1: pos = chain.child_list[res_index+1].resname else: pos = mutated_form[mutated_index+1] if smoothed == 'y': filename = pre + '_' + cur + '_' + pos + '.smooth.scr' else: filename = pre + '_' + cur + '_' + pos + '.scr' table_file = '/home/marciovm/proteins/bdtrimers/' + \ string.lower(cur) + '/' + filename chain_index = chain.child_list[res_index].id[1] # print 'loading table file: ' + table_file ##debugging table = load_scores(table_file) if table != 0: new = score_help(chain, chain_index, table) else: new = 0 score = score + new try: score = (score/size)*1000 #normalize score return score except ZeroDivisionError: print "calculated protein length 0 -> returning score 0" score = 0 return score
def compile_trimers(): import os import commands import sys import string from Bio.PDB.PDBParser import PDBParser from pro_length import length os.chdir('/home/marciovm/proteins/bigdist') ##choose directory list = commands.getoutput( 'ls *.ent').split() #list of pdb files w/ chain names start = sys.argv[1] end = sys.argv[2] aas = [ 'GLY', 'ALA', 'VAL', 'LEU', 'ILE', 'SER', 'THR', 'ASN', 'GLN', 'PHE', 'TYR', 'TRP', 'CYS', 'MET', 'PRO', 'ASP', 'GLU', 'LYS', 'ARG', 'HIS' ] for pdb_index in range(string.atoi(start), string.atoi(end)): pdb = list[pdb_index] pars = PDBParser(PERMISSIVE=1) struct = pars.get_structure(pdb, pdb) model = struct.child_list[0] if pdb[4] == '.': chain_name = ' ' else: chain_name = string.upper(pdb[4]) chain = model.child_dict[chain_name] print 'processing ' + pdb + ' index: ' + str(pdb_index) ##find first real residue index = 0 res = 'first' while aas.count(res) == 0 and index < 10000: try: res = chain.child_list[index].resname except KeyError: pass #nothing here index += 1 res = chain.child_list[index].resname ##should be 2nd real residue ##find last real residue final_res_index = length(chain) + index - 3 ##2nd to last real residue ##look at everything in between for aa_index in range(index, final_res_index): aacur = chain.child_list[aa_index].resname aapre = chain.child_list[aa_index - 1].resname aapost = chain.child_list[aa_index + 1].resname seq = aapre + '_' + aacur + "_" + aapost + '.dat' filename = ('/home/marciovm/proteins/bdtrimers/' + string.lower(aacur) + '/' + seq) try: file_out = open(filename, 'a') except IOError: print "IOError line 63, nostandard aa name in PDB" else: tuple = calc_all_dihedrals(chain, aa_index) if tuple: if len(tuple) > 2: file_out.write( str(tuple[0]).zfill(6) + ' ' + str(tuple[1]).zfill(6) + ' ' + str(tuple[2]).zfill(6) + ' ' + str(tuple[3]).zfill(6) + '\n') else: file_out.write( str(tuple[0]).zfill(6) + ' ' + str(tuple[1]).zfill(6) + '\n') file_out.close()