def __init__(self, pdbfile, statistics_collector): self.pe_instance = PoseEnergies() self.pe_instance.loadFile(pdbfile) self.statistics_collector = statistics_collector self.z_score_dict = {} self.position_positive_shift = 0.0 self.value_pos = 0.0 self.position_negative_shift = 0.0 self.value_neg = 0.0 self.result = '' self.z_sorted = []
FileList = ['12as_nohet_1_relax.pdb', '12e8_nohet_1_relax.pdb', '12ca_nohet_1_relax.pdb', '12gs_nohet_1_relax.pdb'] FileList_modified = ['12as_nohet_1_relax.pdb', '12e8_nohet_1_relax.pdb', 'test_modified_12ca_nohet_1_relax.pdb', '12gs_nohet_1_relax.pdb'] statistics_collector = ResTypesStatisticsCollector() statistics_collector_mod = ResTypesStatisticsCollector() for name in FileList: filename = name pe_instance = PoseEnergies() pe_instance.loadFile(filename) statistics_collector.add_pose_energies(pe_instance) for name in FileList_modified: filename = name pe_instance = PoseEnergies() pe_instance.loadFile(filename) statistics_collector_mod.add_pose_energies(pe_instance) #best score terms #print statistics_collector.restype_av_scores['GLY'].get_best_score('fa_rep') #print statistics_collector_mod.restype_av_scores['GLY'].get_best_score('fa_rep') if float(statistics_collector_mod.restype_av_scores['GLY'].get_best_score('fa_rep')[0]) == -3.333333333: print 'No error seen, when trying to find the best score!'
import sys sys.path.append('../../src') from ResidueEnergies import ResidueEnergies, PoseEnergies,critical_distance_squared from ResTypeAverageScores import ResTypeAverageScores from ResTypesStatisticsCollector import ResTypesStatisticsCollector from constants import * import numpy as np statistics_collector = ResTypesStatisticsCollector() pdbfile1 = "../stddev_and_mean/avetest_mock.pdb" pdbfile2 = "../stddev_and_mean/avetest_mock2.pdb" pe_instance_1 = PoseEnergies() pe_instance_1.loadFile( pdbfile1 ) pe_instance_2 = PoseEnergies() pe_instance_2.loadFile( pdbfile2) statistics_collector.add_pose_energies( pe_instance_1) statistics_collector.add_pose_energies( pe_instance_2) ######################################### error_combined_scored = False #combination of score terms score_terms_to_be_combined = ['faketerm1', 'faketerm2'] for aminoacid in aminoacids:
from ResidueEnergies import ResidueEnergies, PoseEnergies,critical_distance_squared from ResTypeAverageScores import ResTypeAverageScores from constants import * import cPickle import numpy as np import sys ############################################################################# #calculate number of neighbors error4_seen = False filename = '10mh_nohet_1_relax.pdb' pe_instance = PoseEnergies() pe_instance.loadFile(filename) if pe_instance.res_e_list[209].res_type != 'LYS' or pe_instance.res_e_list[209].number_of_neighbors != 8: error4_seen = True elif pe_instance.res_e_list[117].res_type != 'MET' or pe_instance.res_e_list[117].number_of_neighbors != 16: error4_seen = True if not error4_seen: print 'No error when counting neighbors' else: print 'ERROR: Error in counting neighbors'
class ZScoreCalculator(object): def __init__(self, pdbfile, statistics_collector): self.pe_instance = PoseEnergies() self.pe_instance.loadFile(pdbfile) self.statistics_collector = statistics_collector self.z_score_dict = {} self.position_positive_shift = 0.0 self.value_pos = 0.0 self.position_negative_shift = 0.0 self.value_neg = 0.0 self.result = '' self.z_sorted = [] def calculate_z_scores(self, score_term): for re in self.pe_instance.res_e_list: score = 0.0 if '+' in score_term: score_terms = score_term.split("+", 1) score = re.score_dict[score_terms[0]] - re.score_dict[score_terms[1]] elif '-' in score_term: score_terms = score_term.split("-", 1) score = re.score_dict[score_terms[0]] - re.score_dict[score_terms[1]] else: score = re.score_dict[score_term] neighbor_situation = determine_neighbor_situation(re.res_type, re.number_of_neighbors) stats = self.statistics_collector.get_mean_and_stddev(re.res_type, score_term, re.number_of_neighbors) mean = stats[0] stddev = stats[1] zscore = (score - mean)/stddev self.z_score_dict[re.res_num] = [zscore, re.res_type, mean, stddev, score, re.number_of_neighbors] sort = sorted(self.z_score_dict, key=self.z_score_dict.get, reverse = True) for key in sort: self.z_sorted.append([key, self.z_score_dict[key][1], round(self.z_score_dict[key][0], 4)]) return self.z_score_dict def calculate_differences_in_z_scores(self, other_instance, score_term): if len(self.pe_instance.res_e_list) != len(other_instance.pe_instance.res_e_list): sys.exit('ERROR: The compared sequences do not have the same length!') self.calculate_z_scores(score_term) other_instance.calculate_z_scores(score_term) delta_z_scores = {} for res_num in range(len(self.pe_instance.res_e_list)): if not str(res_num) in self.z_score_dict.keys(): #print 'skipped %s' %res_num continue else: other = other_instance.z_score_dict[str(res_num)] sel = self.z_score_dict[str(res_num)] mutation = '-' if sel[1] != other[1]: mutation = 'mutation' if sel[2] != other[2] and mutation == '-': #print 'adapted mean and stddev from wild type residue for %s i.e. %s at pos. %s. mean changed from %s to %s' %(sel[1], other[1], res_num, other[2], sel[2]) #print 'neighbors changed from %s to %s' %(sel[5], other[5]) other[0] = (other[4] - sel[2])/sel[3] delta_z_scores[str(res_num)] = (other[0] - sel[0]), mutation other_instance.position_positive_shift = max(delta_z_scores, key=delta_z_scores.get) other_instance.value_pos = delta_z_scores[other_instance.position_positive_shift][0] other_instance.position_negative_shift = min(delta_z_scores, key=delta_z_scores.get) other_instance.value_neg = delta_z_scores[other_instance.position_negative_shift][0] delta_z_scores_sorted = sorted(delta_z_scores, key=delta_z_scores.get, reverse=True) other_instance.result = [] for key in delta_z_scores_sorted: other_instance.result.append([key, round(delta_z_scores[key][0], 4), delta_z_scores[key][1]]) return other_instance.result def get_goodz(self, goodz): string_goodz = '' for entry in self.result: if entry[1] <= goodz: if string_goodz =='': string_goodz = entry[0] else: string_goodz = '+'.join([string_goodz, entry[0]]) print 'select goodz, resi %s' %string_goodz def get_badz(self, badz): string_badz = '' for entry in self.result: if entry[1] >= badz: if string_badz =='': string_badz = entry[0] else: string_badz = '+'.join([string_badz, entry[0]]) print 'select badz, resi %s' %string_badz
#!/usr/bin/python import cPickle import sys sys.path.append('../src') from ResidueEnergies import ResidueEnergies, PoseEnergies from ResTypeAverageScores import ResTypeAverageScores import numpy as np from ResTypesStatisticsCollector import ResTypesStatisticsCollector aminoacids = ['ALA', 'CYS', 'ASP', 'GLU', 'PHE', 'GLY', 'HIS', 'ILE', 'LYS', 'LEU', 'MET', 'ASN', 'PRO', 'GLN', 'ARG', 'SER', 'THR', 'VAL', 'TRP', 'TYR'] pose_energies = PoseEnergies() # creates instance of PoseEnergies pose_energies.loadFile('stddev_and_mean/avetest_mock.pdb') statistics_collector = ResTypesStatisticsCollector() pdbfile1 = "stddev_and_mean/avetest_mock.pdb" pdbfile2 = "stddev_and_mean/avetest_mock2.pdb" pe_instance_1 = PoseEnergies() pe_instance_1.loadFile( pdbfile1 ) pe_instance_2 = PoseEnergies() pe_instance_2.loadFile( pdbfile2) statistics_collector.add_pose_energies( pe_instance_1) statistics_collector.add_pose_energies( pe_instance_2)