##os.chdir(DATADIR) dict_chains_scores = {} out_iter_pdb = list_to_check[it] lab = list_labels[it] if os.path.isfile(os.path.join(DATADIR, out_iter_pdb)): #read pdb structure_instance = PDBParser.read_PDB_file('pdbfile', os.path.join( DATADIR, out_iter_pdb), hetatm=False, water=False) #get scores dict_ch_scores, dict_chain_res = sc.SMOC(emmap, res_map, structure_instance, win, rfilepath, sim_sigma_coeff) else: print 'PDB file not found:', out_iter_pdb if rigid_out: dict_chain_indices, dict_chain_CA = blurrer.get_coordinates( structure_instance) rigidf = open(rigid_out_prefix + '_' + lab, 'w') sum_avg_smoc = 0. ct_chain = 0 for ch in dict_ch_scores: flagch = 1 dict_res_scores = dict_ch_scores[ch] #get res number list (for ref)
def score(session, atomic_models, map_model, rigid_filename, rez, sim_sigma=0.187, window=9, colour_atoms=True): # TODO - rigid_filename might be optional? # TODO - this function is too long sc = ScoringFunctions() rvals = [] for atomic_model in atomic_models: atomlist = [] for atom in atomic_model.atoms: atomlist.append(chimera_to_tempy_atom(atom, len(atomlist))) bio_atom_structure = BioPy_Structure(atomlist) bio_map_structure = chimera_to_tempy_map(map_model) slow = 0.50 shigh = 0.25 # fraction of structure fitted reasonably well initially list_zscores = [] curdir = os.getcwd() rerun_ct = 0 flag_rerun = 0 it = 0 dict_reslist = {} dict_chains_scores = {} dict_ch_scores, dict_chain_res = sc.SMOC(bio_map_structure, rez, bio_atom_structure, window, rigid_filename, sim_sigma) for ch in dict_ch_scores: flagch = 1 dict_res_scores = dict_ch_scores[ch] #get res number list (for ref) if it == 0: dict_reslist[ch] = dict_chain_res[ch][:] try: if len(dict_reslist[ch]) == 0: print('Chain missing:', out_iter_pdb, ch) flagch = 0 continue except KeyError: print('Chain not common:', ch, out_iter_pdb) flagch = 0 continue try: reslist = dict_reslist[ch] except KeyError: print('Chain not common:', ch, out_iter_pdb) flagch = 0 continue if not ch in dict_chains_scores: dict_chains_scores[ch] = {} scorelist = [] for res in reslist: try: scorelist.append(dict_res_scores[res]) except KeyError: if reslist.index(res) <= 0: scorelist.append( dict_res_scores[reslist[reslist.index(res) + 1]]) else: try: scorelist.append( dict_res_scores[reslist[reslist.index(res) - 1]]) except IndexError: scorelist.append(0.0) #save scores for each chain curscore = "{0:.2f}".format(round(scorelist[-1], 2)) try: dict_chains_scores[ch][res][it] = str(curscore) except KeyError: dict_chains_scores[ch][res] = [str(0.0)] dict_chains_scores[ch][res][it] = str(curscore) #calc ratio between current and prev scores if it > 0: score_cur = scorelist[:] score_inc = [(1 + x) / (1 + y) for x, y in zip(score_cur, score_prev)][:] score_diff = [(x - y) for x, y in zip(score_cur, score_prev)][:] #calculate z-scores npscorelist = np.array(scorelist) try: list_zscores.append( (npscorelist - np.mean(npscorelist)) / np.std(npscorelist)) except: list_zscores.append((npscorelist - np.mean(npscorelist))) #calculate low and high score bounds list_sccc = scorelist[:] score_prev = scorelist[:] list_sccc.sort() #save avg of highest and lowest 20% avglow = list_sccc[int(len(list_sccc) * slow)] if avglow == 0.0: avglow = 0.00001 avghigh = list_sccc[int(len(list_sccc) * (1 - shigh))] if it == 0: avghigh1 = list_sccc[int(len(list_sccc) * (1 - shigh))] curratio = avghigh / avglow #print it, 'Num of good scoring residues', len(goodset) print(ch, 'avg-top25%, avg-low25%, avg-high/avg-low', avghigh, avglow, avghigh / avglow) print(ch, 'avg', sum(scorelist) / len(scorelist)) #include smoc scores as b-factor records for x in bio_atom_structure.atomList: cur_chain = x.chain cur_res = x.get_res_no() if not cur_chain in dict_reslist.keys(): continue if cur_chain in dict_chains_scores.keys(): try: x.temp_fac = dict_chains_scores[cur_chain][cur_res][it] except: print('Residue missing: ', cur_res, ch, out_iter_pdb) x.temp_fac = 0.0 else: x.temp_fac = 0.0 rvals.append((dict_chains_scores, dict_reslist)) return rvals
def test_tempy_smoc(self): ''' Test the tempy smoc score based on the files provided. Use this as a baseline for the second chimeraX test. It is taken straight from the score_smoc.py example tutorial.''' list_labels = [] tp = TempyParser() tp.generate_args() # the sigma factor determines the width of the Gaussian distribution used to describe each atom sim_sigma_coeff = 0.187 #score window win = 9 path_test = os.getcwd() map_file = os.path.join(path_test, '1akeA_10A.mrc') res_map = 10.0 DATADIR = path_test list_to_check = ['1ake_mdl1.pdb'] if len(list_labels) == 0: list_labels = [x.split('.')[0] for x in list_to_check] #['initial','final'] list_styles = [ ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-' ] #'--' z_score_check = 2 def model_tree(list_coord1, distpot=3.5, list_coord2=None): try: from scipy.spatial import cKDTree coordtree = cKDTree(list_coord2) except ImportError: from scipy.spatial import KDTree coordtree = KDTree(list_coord12) if list_coord2 != None: neigh_points = coordtree.query_ball_point(list_coord1, distpot) return neigh_points start_pdb = list_to_check[0] iter_num = len(list_to_check) intermed_file = "" slow = 0.50 shigh = 0.25 # fraction of structure fitted reasonably well initially rigidbody_file = None sc = ScoringFunctions() emmap = MapParser.readMRC(map_file) rfilepath = rigidbody_file dict_str_scores = {} if rigidbody_file is not None: rfilepath = os.path.join(DATADIR, rigidbody_file) list_zscores = [] curdir = os.getcwd() rerun_ct = 0 flag_rerun = 0 it = 0 dict_reslist = {} # TODO - this whole bit needs a cleanup I think while iter_num > 0: dict_chains_scores = {} out_iter_pdb = list_to_check[it] lab = list_labels[it] if os.path.isfile(os.path.join(DATADIR, out_iter_pdb)): #read pdb structure_instance = PDBParser.read_PDB_file('pdbfile', os.path.join( DATADIR, out_iter_pdb), hetatm=False, water=False) #get scores dict_ch_scores, dict_chain_res = sc.SMOC( emmap, res_map, structure_instance, win, rfilepath, sim_sigma_coeff) else: print('PDB file not found:', out_iter_pdb) for ch in dict_ch_scores: flagch = 1 dict_res_scores = dict_ch_scores[ch] #get res number list (for ref) if it == 0: dict_reslist[ch] = dict_chain_res[ch][:] try: if len(dict_reslist[ch]) == 0: print('Chain missing:', out_iter_pdb, ch) flagch = 0 continue except KeyError: print('Chain not common:', ch, out_iter_pdb) flagch = 0 continue try: reslist = dict_reslist[ch] except KeyError: print('Chain not common:', ch, out_iter_pdb) flagch = 0 continue if not ch in dict_chains_scores: dict_chains_scores[ch] = {} scorelist = [] for res in reslist: try: scorelist.append(dict_res_scores[res]) except KeyError: if reslist.index(res) <= 0: scorelist.append( dict_res_scores[reslist[reslist.index(res) + 1]]) else: try: scorelist.append( dict_res_scores[reslist[reslist.index(res) - 1]]) except IndexError: scorelist.append(0.0) #save scores for each chain curscore = "{0:.2f}".format(round(scorelist[-1], 2)) try: dict_chains_scores[ch][res][it] = str(curscore) except KeyError: dict_chains_scores[ch][res] = [str(0.0) ] * len(list_to_check) dict_chains_scores[ch][res][it] = str(curscore) dict_str_scores[lab] = dict_chains_scores #calc ratio between current and prev scores if it > 0: score_cur = scorelist[:] score_inc = [(1 + x) / (1 + y) for x, y in zip(score_cur, score_prev)][:] score_diff = [(x - y) for x, y in zip(score_cur, score_prev)][:] #calculate z-scores npscorelist = np.array(scorelist) try: list_zscores.append((npscorelist - np.mean(npscorelist)) / np.std(npscorelist)) except: list_zscores.append((npscorelist - np.mean(npscorelist))) #calculate low and high score bounds list_sccc = scorelist[:] score_prev = scorelist[:] list_sccc.sort() #save avg of highest and lowest 20% avglow = list_sccc[int(len(list_sccc) * slow)] if avglow == 0.0: avglow = 0.00001 avghigh = list_sccc[int(len(list_sccc) * (1 - shigh))] if it == 0: avghigh1 = list_sccc[int(len(list_sccc) * (1 - shigh))] curratio = avghigh / avglow self.assertTrue(abs(avghigh - 0.967) < 0.01) self.assertTrue(abs(avglow - 0.956) < 0.01) self.assertTrue( abs(sum(scorelist) / len(scorelist) - 0.899) < 0.01) #include smoc scores as b-factor records for x in structure_instance.atomList: cur_chain = x.chain cur_res = x.get_res_no() if not cur_chain in dict_reslist.keys(): continue if cur_chain in dict_chains_scores.keys(): try: x.temp_fac = dict_chains_scores[cur_chain][cur_res][it] except: print('Residue missing: ', cur_res, ch, out_iter_pdb) x.temp_fac = 0.0 else: x.temp_fac = 0.0 it = it + 1 iter_num = iter_num - 1