def map_contour(m, t=-1.): mName = os.path.basename(m).split('.')[0] #print 'reading map' emmap = MapParser.readMRC(m) c1 = None if t != -1.0: zeropeak, ave, sigma1 = emmap._peak_density() if not zeropeak is None: c1 = zeropeak + (t * sigma1) else: c1 = 0.0 return mName, emmap, c1
def test_tempy_sccc(self): ''' Test the tempy sccc score based on the files provided. Use this as a baseline for the second chimeraX test. ''' # the sigma factor determines the width of the Gaussian distribution used to describe each atom sim_sigma_coeff = 0.187 path_test = "./" m = os.path.join(path_test, '1akeA_10A.mrc') p = os.path.join(path_test, '1ake_mdl1.pdb') r = 10.0 rb_file = os.path.join(path_test, '1ake_mdl1_rigid.txt') scorer = ScoringFunctions() # read map file emmap = MapParser.readMRC(m) # read PDB file structure_instance = PDBParser.read_PDB_file('pdbfile', p, hetatm=False, water=False) SCCC_list_structure_instance = [] # read rigid body file and generate structure instances for each segment listRB = RBParser.read_FlexEM_RIBFIND_files(rb_file, structure_instance) # score each rigid body segment listsc_sccc = [] for RB in listRB: # sccc score score_SCCC = scorer.SCCC(emmap, r, sim_sigma_coeff, structure_instance, RB) listsc_sccc.append(score_SCCC) self.assertTrue(len(listRB) == 6) self.assertTrue(abs(round(listsc_sccc[0], 4) - 0.954) < 0.01) self.assertTrue(abs(round(listsc_sccc[1], 4) - 0.427) < 0.01) self.assertTrue(abs(round(listsc_sccc[2], 4) - 0.624) < 0.01) self.assertTrue(abs(round(listsc_sccc[3], 4) - 0.838) < 0.01) self.assertTrue(abs(round(listsc_sccc[4], 4) - 0.971) < 0.01) self.assertTrue(abs(round(listsc_sccc[5], 4) - 0.928) < 0.01)
def _ccc(self, mapname, modelname, res): path_test = "./" m = os.path.join(path_test, mapname) emmap1 = MapParser.readMRC(m) p = os.path.join(path_test, modelname) structure_instance = PDBParser.read_PDB_file('pdbfile', p, hetatm=False, water=False) blurrer = StructureBlurrer() t = 1.5 c1 = None c2 = None #calculate map contour zeropeak, ave, sigma1 = emmap1._peak_density() if not zeropeak is None: c1 = zeropeak + (t * sigma1) mt = 0.1 if res > 20.0: mt = 2.0 elif res > 10.0: mt = 1.0 elif res > 6.0: mt = 0.5 #emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) emmap2 = blurrer.gaussian_blur_real_space(structure_instance, res, sigma_coeff=0.187, densMap=emmap1, normalise=True) # calculate model contour - emmap1 apparently? c2 = mt * emmap2.std() sc = ScoringFunctions() _, ovr = sc.CCC_map(emmap1, emmap2, c1, c2, 3, cmode=False) ccc, _ = sc.CCC_map(emmap1, emmap2, c1, c2, cmode=False) print("Printing CCC", ccc, ovr, c1, c2) return (ccc, ovr)
def generate_csv_file(workingDir): emDirectory = workingDir+"/simulated/EM" numFiles = numOfFilesSubdir(emDirectory) currFileNum = 0 #generate CSV file of training set print 'Generate CSV file of features...' csv_file_path = workingDir+"/simulated/training_set.csv" with open(csv_file_path, "w") as csvFile: csvFileWriter = csv.writer(csvFile, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) for aaDirName in os.listdir(emDirectory): if aaDirName in aAList: emAaDir = "{0}/{1}".format(emDirectory, aaDirName) for emfileName in os.listdir(emAaDir): emFilePath = "{0}/{1}".format(emAaDir,emfileName) emMap = MapParser.readMRC(emFilePath) csvFileWriter.writerow([aaDirName] + features(emMap)) currFileNum += 1 sys.stdout.write('\r{:4}/{:4} ({:5.4}%), current file: {}'.format(currFileNum, numFiles, currFileNum*100./numFiles, emFilePath))# comma to suppress the newline sys.stdout.flush() print '\n',
def findMaxEMMapDimensions(workingDir): """ Find the maximal EM map dimensions :param workingDir: :return: """ print 'Finding the maximal EM map dimensions...' emDirectory = workingDir+"/simulated/EM" maxEmMapSize = [0,0,0] for aaDirName in os.listdir(emDirectory): if aaDirName in aAList: emAaDir = "{0}/{1}".format(emDirectory, aaDirName) for emfileName in os.listdir(emAaDir): emFilePath = "{0}/{1}".format(emAaDir,emfileName) emMap=MapParser.readMRC(emFilePath) emMapSize = emMap.box_size() if emMapSize[0]>maxEmMapSize[0]: maxEmMapSize[0] = emMapSize[0] if emMapSize[1]>maxEmMapSize[1]: maxEmMapSize[1] = emMapSize[1] if emMapSize[2]>maxEmMapSize[2]: maxEmMapSize[2] = emMapSize[2] return maxEmMapSize
#emmap = blurrer.gaussian_blur(structure_instance, res,densMap=emmap_1,normalise=True) modelmap = blurrer.gaussian_blur_real_space(structure_instance, res,sigma_coeff=0.187,densMap=emmap,normalise=True) return pName,modelmap, structure_instance #GET INPUT DATA output_synthetic_map = False if flag_example: m1 = os.path.join(path_example,'emd_1046.map') m2 = os.path.join(path_example,'emd_1047_resampled_1046.mrc') r1 = 23.5 r2 = 14.5 Name1 = os.path.basename(m1).split('.')[0] Name2 = os.path.basename(m2).split('.')[0] c1 = 0.0607 c2 = 0.0597 emmap1=MapParser.readMRC(m1) emmap2=MapParser.readMRC(m2) emmap1.fullMap = (emmap1.fullMap-emmap1.mean())#emmap1.mean())/emmap1.std() flag_filt = False sw = 0.001 elif all(x is None for x in [m,m1,m2]): # for 2 models if None in [p1,p2]: sys.exit('Input two maps or a map and model, map resolution(s) (required) and contours (optional)') r1 = r2 = r = 4.0 Name1,emmap1,c1,p1inst = model_contour(p1,res=4.0,emmap=False,t=0.1) if c2 is None: Name2,emmap2,c2,p2inst = model_contour(p2,res=r,emmap=False,t=0.1) else: p2Name,emmap2 = blur_model(p2,res=r,emmap=False) flag_filt = False flag_scale = False elif None in [m1,m2]:
path_out='Test_Files' if os.path.exists(path_out)==True: print "%s exists" %path_out else: os.mkdir(path_out) os.chdir(path_out) structure_instance=PDBParser.read_PDB_file('1J6Z','1J6Z.pdb',hetatm=False,water=False) print structure_instance blurrer = StructureBlurrer() EnsembleGeneration=EnsembleGeneration() scorer = ScoringFunctions() map_target=MapParser.readMRC('emd_5168_monomer.mrc') #read target map map_probe = blurrer.gaussian_blur(structure_instance, 6.6,densMap=map_target)#create a simulated map from the structure instance #Create a Random ensemble of 10 structures randomly within 5 A translation and 60 deg rotation. list_rotate_models=EnsembleGeneration.randomise_structs(structure_instance, 10, 5, 60, v_grain=30, rad=False,write=True) #CCC score from starting fit line='%s %s\n'%('1J6Z',scorer.CCC(map_probe,map_target)) count=0 #loop to score each of the alternative fits in the ensemble for mod in list_rotate_models: count+=1 mod_name=mod[0] mod_structure_instance=mod[1] map_probe = blurrer.gaussian_blur(mod_structure_instance, 6.6,densMap=map_target,sigma_coeff=0.187)
flag_example = True #GET INPUT DATA if flag_example: m = os.path.join(path_example, '1akeA_10A.mrc') p = os.path.join(path_example, '1ake_mdl1.pdb') r = 10.0 rb_file = os.path.join(path_example, '1ake_mdl1_rigid.txt') elif None in [m1, m2]: # for one map and model m = tp.args.inp_map if m is None: m = m1 assert os.path.isfile(m) print 'reading map' Name1 = os.path.basename(m).split('.')[0] emmap1 = MapParser.readMRC(m) if r1 is None and r is None: sys.exit('Input a map and model, map resolution (required)') elif r1 is None: r1 = r if all(x is None for x in [p, p1, p2]): sys.exit('Input a map and model, map resolution (required)') elif None in [p1, p2]: p = tp.args.pdb else: sys.exit('Input a map and model, map resolution (required)') rb_file = tp.args.rigidfile if rb_file is None: sys.exit('Rigid body file missing') # make class instances for density simulation (blurring), scoring and plot scores blurrer = StructureBlurrer()
# Read pdb try: prot = PDBParser.read_PDB_file(input_config["--outdir"], input_config["--ipdb"], hetatm=False, water=False) # Get number of components comps = prot.split_into_chains() ncomp = len(comps) except: print "Error in reading pdb coordinate file\n" usage() exit(0) try: emmap = MapParser.readMRC(input_config["--imap"]) emmap.normalise() except: print "Error in reading map density file\n" usage() exit(0) try: res = float(input_config["--res"]) if res == 0.0: raise except: print "Error in reading map resolution value\n" usage() exit(0) #Set the machine names for prallel processing
def transform_map(self, matR, transvec, m1, m2, c1, c2): mat = matR.T emmap1 = MapParser.readMRC(m1) emmap2 = MapParser.readMRC(m2) # geometric centre of map vec_centre = emmap2.centre() spacing = emmap2.apix # to work on the box transformations, get the box centre irrespective of origin vec_centre.x = vec_centre.x - emmap2.x_origin() vec_centre.y = vec_centre.y - emmap2.y_origin() vec_centre.z = vec_centre.z - emmap2.z_origin() # calculate new box dimensions, after rotation new_centre = emmap2._box_transform(matR) output_shape = (int(new_centre.x / spacing), int(new_centre.y / spacing), int(new_centre.z / spacing)) new_centre.x = new_centre.x / 2 new_centre.y = new_centre.y / 2 new_centre.z = new_centre.z / 2 # offset for rotation offset = emmap2._rotation_offset(mat, vec_centre, new_centre) #APPLY ROTATION emmap2 = emmap2._matrix_transform_offset(mat, output_shape, offset.x, offset.y, offset.z) offset_x = new_centre.x - vec_centre.x offset_y = new_centre.y - vec_centre.y offset_z = new_centre.z - vec_centre.z emmap2 = emmap2.shift_origin(-offset_x, -offset_y, -offset_z) # TRANSLATION COMPONENT a14, a24, a34 = transvec[0], transvec[1], transvec[2] emmap_2 = emmap2.shift_origin( float(a14) * spacing, float(a24) * spacing, float(a34) * spacing) emmap_1 = emmap1.copy() # CROP BOX TO REDUCE ARRAY SIZE emmap_1._crop_box(c1, 2) emmap_2._crop_box(c2, 2) # DETERMINE A COMMON ALIGNMENT BOX spacing = emmap_2.apix if emmap_2.apix < emmap_1.apix: spacing = emmap_1.apix grid_shape, new_ori = emmap_1._alignment_box(emmap_2, spacing) # INTERPOLATE TO NEW GRID emmap_1 = emmap_1._interpolate_to_grid(grid_shape, spacing, new_ori) emmap_2 = emmap_2._interpolate_to_grid(grid_shape, spacing, new_ori) sc = ScoringFunctions() ccc = sc.CCF_mask_zero(emmap_1, emmap_2, c1, c2) mi = sc.MI(emmap_1, emmap_2) env = sc.map_envelope_score(emmap_1, emmap_2, c1, c2) nv = sc.normal_vector_score(emmap_1, emmap_2, float(c1) - (emmap1.std() * 0.05), float(c1) + (emmap1.std() * 0.05)) nv = sc.normal_vector_score(emmap_1, emmap_2, float(c1) - (emmap1.std() * 0.05), float(c1) + (emmap1.std() * 0.05), Filter='Sobel') return ccc, mi, env, nv, nv_s
from TEMPy.StructureParser import PDBParser from TEMPy.MapParser import MapParser import numpy as np # define point for rotation # tempy examples use COM from input structure # rotating against 0 0 0 doesn't seem to work import TEMPy.Vector as Vector com = Vector.Vector(90, 90, 90) # read in map target_map = MapParser.readMRC('GLIC_pH5_half1_unfil.mrc') #read target map # rotate along x, y, z target_map = target_map.rotate_by_axis_angle(1, 0, 0, np.rad2deg(-3.1396619777494124), com) target_map = target_map.rotate_by_axis_angle(0, 1, 0, np.rad2deg(0.0005038746980934731), com) target_map = target_map.rotate_by_axis_angle(0, 0, 1, np.rad2deg(2.125868534775962), com) # translate along x, y, z target_map = target_map.translate(-42, -58, 5) # save map target_map.write_to_MRC_file('moved.mrc') # Writing out to MRC file
def test_tempy_smoc(self): ''' Test the tempy smoc score based on the files provided. Use this as a baseline for the second chimeraX test. It is taken straight from the score_smoc.py example tutorial.''' list_labels = [] tp = TempyParser() tp.generate_args() # the sigma factor determines the width of the Gaussian distribution used to describe each atom sim_sigma_coeff = 0.187 #score window win = 9 path_test = os.getcwd() map_file = os.path.join(path_test, '1akeA_10A.mrc') res_map = 10.0 DATADIR = path_test list_to_check = ['1ake_mdl1.pdb'] if len(list_labels) == 0: list_labels = [x.split('.')[0] for x in list_to_check] #['initial','final'] list_styles = [ ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-' ] #'--' z_score_check = 2 def model_tree(list_coord1, distpot=3.5, list_coord2=None): try: from scipy.spatial import cKDTree coordtree = cKDTree(list_coord2) except ImportError: from scipy.spatial import KDTree coordtree = KDTree(list_coord12) if list_coord2 != None: neigh_points = coordtree.query_ball_point(list_coord1, distpot) return neigh_points start_pdb = list_to_check[0] iter_num = len(list_to_check) intermed_file = "" slow = 0.50 shigh = 0.25 # fraction of structure fitted reasonably well initially rigidbody_file = None sc = ScoringFunctions() emmap = MapParser.readMRC(map_file) rfilepath = rigidbody_file dict_str_scores = {} if rigidbody_file is not None: rfilepath = os.path.join(DATADIR, rigidbody_file) list_zscores = [] curdir = os.getcwd() rerun_ct = 0 flag_rerun = 0 it = 0 dict_reslist = {} # TODO - this whole bit needs a cleanup I think while iter_num > 0: dict_chains_scores = {} out_iter_pdb = list_to_check[it] lab = list_labels[it] if os.path.isfile(os.path.join(DATADIR, out_iter_pdb)): #read pdb structure_instance = PDBParser.read_PDB_file('pdbfile', os.path.join( DATADIR, out_iter_pdb), hetatm=False, water=False) #get scores dict_ch_scores, dict_chain_res = sc.SMOC( emmap, res_map, structure_instance, win, rfilepath, sim_sigma_coeff) else: print('PDB file not found:', out_iter_pdb) for ch in dict_ch_scores: flagch = 1 dict_res_scores = dict_ch_scores[ch] #get res number list (for ref) if it == 0: dict_reslist[ch] = dict_chain_res[ch][:] try: if len(dict_reslist[ch]) == 0: print('Chain missing:', out_iter_pdb, ch) flagch = 0 continue except KeyError: print('Chain not common:', ch, out_iter_pdb) flagch = 0 continue try: reslist = dict_reslist[ch] except KeyError: print('Chain not common:', ch, out_iter_pdb) flagch = 0 continue if not ch in dict_chains_scores: dict_chains_scores[ch] = {} scorelist = [] for res in reslist: try: scorelist.append(dict_res_scores[res]) except KeyError: if reslist.index(res) <= 0: scorelist.append( dict_res_scores[reslist[reslist.index(res) + 1]]) else: try: scorelist.append( dict_res_scores[reslist[reslist.index(res) - 1]]) except IndexError: scorelist.append(0.0) #save scores for each chain curscore = "{0:.2f}".format(round(scorelist[-1], 2)) try: dict_chains_scores[ch][res][it] = str(curscore) except KeyError: dict_chains_scores[ch][res] = [str(0.0) ] * len(list_to_check) dict_chains_scores[ch][res][it] = str(curscore) dict_str_scores[lab] = dict_chains_scores #calc ratio between current and prev scores if it > 0: score_cur = scorelist[:] score_inc = [(1 + x) / (1 + y) for x, y in zip(score_cur, score_prev)][:] score_diff = [(x - y) for x, y in zip(score_cur, score_prev)][:] #calculate z-scores npscorelist = np.array(scorelist) try: list_zscores.append((npscorelist - np.mean(npscorelist)) / np.std(npscorelist)) except: list_zscores.append((npscorelist - np.mean(npscorelist))) #calculate low and high score bounds list_sccc = scorelist[:] score_prev = scorelist[:] list_sccc.sort() #save avg of highest and lowest 20% avglow = list_sccc[int(len(list_sccc) * slow)] if avglow == 0.0: avglow = 0.00001 avghigh = list_sccc[int(len(list_sccc) * (1 - shigh))] if it == 0: avghigh1 = list_sccc[int(len(list_sccc) * (1 - shigh))] curratio = avghigh / avglow self.assertTrue(abs(avghigh - 0.967) < 0.01) self.assertTrue(abs(avglow - 0.956) < 0.01) self.assertTrue( abs(sum(scorelist) / len(scorelist) - 0.899) < 0.01) #include smoc scores as b-factor records for x in structure_instance.atomList: cur_chain = x.chain cur_res = x.get_res_no() if not cur_chain in dict_reslist.keys(): continue if cur_chain in dict_chains_scores.keys(): try: x.temp_fac = dict_chains_scores[cur_chain][cur_res][it] except: print('Residue missing: ', cur_res, ch, out_iter_pdb) x.temp_fac = 0.0 else: x.temp_fac = 0.0 it = it + 1 iter_num = iter_num - 1
def test_tempy_nmi(self): ''' Test the tempy nmi score based on the files provided. Use this as a baseline for the second chimeraX test. ''' path_test = "./" m = os.path.join(path_test, 'emd_5168.map') p = os.path.join(path_test, 'emd_5170.map') sc = ScoringFunctions() rez1 = 6.6 rez2 = 15.0 Name1, emmap1, c1 = map_contour(m, t=1.5) Name2, emmap2, c2 = map_contour(p, t=1.5) print(rez1, rez2, c1, c2, emmap1.apix, emmap2.apix) if not sc.mapComparison(emmap1, emmap2): emmap1._crop_box(c1, 0.5) emmap2._crop_box(c2, 0.5) if rez1 > 1.25 * rez2: emmap_2 = lpfilter(emmap2, rez1) emmap1, emmap2 = match_grid(emmap1, emmap_2, c1, c2) elif rez2 > 1.25 * rez1: emmap_1 = lpfilter(emmap1, rez2) emmap1, emmap2 = match_grid(emmap_1, emmap2, c1, c2) else: emmap1, emmap2 = match_grid(emmap1, emmap2, c1, c2) nmi = 0 try: nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True) if nmi < 0.0: nmi = 0.0 except: self.assertTrue(False) print_exc() nmi = 0.0 self.assertTrue(abs(round(nmi, 5) - 1.0492) < 0.001) # Now test with a model and map p = os.path.join(path_test, '1J6Z.pdb') m = os.path.join(path_test, 'emd_5168_monomer.mrc') res = 6.6 Name1 = os.path.basename(m).split('.')[0] Name2 = os.path.basename(p).split('.')[0] emmap1 = MapParser.readMRC(m) structure_instance = PDBParser.read_PDB_file(Name2, p, hetatm=False, water=False) blurrer = StructureBlurrer() emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) c1 = 9.7 c2 = 1.0 nmi = 0 try: nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True) if nmi < 0.0: nmi = 0.0 except: self.assertTrue(False) print_exc() nmi = 0.0 self.assertTrue(abs(round(nmi, 5) - 1.0575) < 0.001)
#import numpy as np import os from TEMPy.class_arg import TempyParser tp = TempyParser() tp.generate_args() if not tp.args.inp_map is None: m1 = tp.args.inp_map elif not tp.args.inp_map1 is None: m1 = tp.args.inp_map1 else: sys.exit('Input map missing') print 'reading map' m1Name = os.path.basename(m1).split('.')[0] emmap_1 = MapParser.readMRC(m1) if not tp.args.thr is None: c1 = tp.args.thr elif not tp.args.thr1 is None: c1 = tp.args.thr1 else: print 'calculating contour' zeropeak, ave, sigma1 = emmap_1._peak_density() if not zeropeak is None: c1 = zeropeak + (1.5 * sigma1) else: sys.exit('Contour level required') level = c1 sigma = emmap_1.fullMap.std() sigma = abs(sigma) try: emmap_1.fullMap = emmap_1._label_patches(level - 0.02 * sigma)[0]
from TEMPy.StructureParser import PDBParser from TEMPy.MapParser import MapParser from TEMPy.StructureBlurrer import StructureBlurrer import numpy as np import sys # define point for rotation # tempy examples use COM from input structure # rotating against 0 0 0 doesn't seem to work import TEMPy.Vector as Vector com = Vector.Vector(90, 90, 90) # read in map target_map = MapParser.readMRC(sys.argv[1]) #read target map # read in structure structure_instance = PDBParser.read_PDB_file('structure_id', sys.argv[2]) # translate along x, y, z structure_instance.translate(42, 58, -5) # rotate along x, y, z structure_instance.rotate_by_axis_angle(0, 0, 1, np.rad2deg(-2.125868534775962), com=com) structure_instance.rotate_by_axis_angle(0, 1, 0, np.rad2deg(-0.0005038746980934731),