def computeCc(process_n): #compute nlm_cc print "\nRun task cc pid%s" % (os.getpid()), time.ctime() mov_nlm_array = mov_model.nlm_array fix_nlm_array = math.nlm_array(nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() nlmRes = [] start_line = process_n * perSizeCc if (process_n + 1) * perSizeCc > nlNum: end_line = nlNum else: end_line = (process_n + 1) * perSizeCc print "cc_end_line:", end_line for i in range(start_line, end_line): indx = sortedNlRes[i][0] fix = nlm_coefs[indx][0:nlm_total] fix_nlm_array.load_coefs(nlm, fix) align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True) cc = align_obj.get_cc() print indx, codes[indx], cc nlmRes.append((indx, codes[indx], cc)) # print "thread %d finished at"%(process_n), time.ctime() print "len------", len(nlmRes) return nlmRes
def pair_align(self, nlm_coefs, calc_cc=True): self.cc_array = [] for ii in range(self.ntop): self.cc_array.append(flex.double(self.ntop, 1)) if (nlm_coefs is not None and calc_cc): comment = "# Correlation Coefficient <rho_1(r)*rho_2(r)>" fix_nlm_array = math.nlm_array(self.nmax) mov_nlm_array = math.nlm_array(self.nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.coefs().size() top_coefs = [] mean = flex.double() sig = flex.double() for ii in range(self.ntop): ff = self.best_models[ii] fix = nlm_coefs[ff][0:nlm_total] top_coefs.append(fix) fix_nlm_array.load_coefs(nlm, fix) m, s = get_mean_sigma(fix_nlm_array) mean.append(m) sig.append(s) for ii in range(self.ntop): fix = top_coefs[ii] fix_nlm_array.load_coefs(nlm, fix) for jj in range(ii): mov = top_coefs[jj] mov_nlm_array.load_coefs(nlm, mov) cc = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=self.nmax, refine=True).best_score cc = (cc - mean[ii] * mean[jj]) / (sig[ii] * sig[jj]) self.cc_array[ii][jj] = cc self.cc_array[jj][ii] = cc outfile = self.prefix + ".cc" out = open(outfile, 'w') print >> out, comment for ii in range(1, self.ntop + 1): print >> out, "%6d" % ii, print >> out, " average" for ii in range(self.ntop): for jj in range(self.ntop): print >> out, "%6.3f" % self.cc_array[ii][jj], print >> out, "%6.3f" % flex.mean(self.cc_array[ii]) clusters = hcluster.hcluster(self.cc_array, 0.8) clusters.print_hclust() out.close() tree_dot_file = self.prefix + ".tree" clusters.print_neato(tree_dot_file) # generate image using neato # run_command('/sw/bin/neato -Tpng -o cluster.png '+tree_dot_file ) #clusters.print_neato() self.clusters = clusters return
def process(pdb_files, nmax, rmax=50.0, fraction=0.9): pdb_models = [] rmax_over_fraction = rmax / fraction shift = (rmax_over_fraction, rmax_over_fraction, rmax_over_fraction) for file in pdb_files: model = model_interface.container(pdbfile=file, rmax=rmax, nmax=nmax) if (model is not None): if (len(pdb_models) == 0): ref_nlm_array = model.nlm_array pdb_models.append( pdb_model(ref_nlm_array.coefs(), file, model.rmax, model)) ea = (0, 0, 0) outname = model.id + '_sa.pdb' model.write_pdb(rmax=rmax_over_fraction, rotation=ea, filename=outname) else: mov_nlm_array = model.nlm_array align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) pdb_models.append( pdb_model(align_obj.moving_nlm.coefs(), file, model.rmax, model)) ea = align_obj.best_ea outname = model.id + '_sa.pdb' model.write_pdb(rmax=rmax_over_fraction, rotation=ea, filename=outname) return pdb_models
def process(pdb_files, nmax, rmax=50.0, fraction=0.9): pdb_models = [] rmax_over_fraction = rmax / fraction shift = (rmax_over_fraction, rmax_over_fraction, rmax_over_fraction) for file in pdb_files: mom_obj, vox_obj, ipdb = pdb2zernike.zernike_moments( file, nmax=nmax, coef_out=False, calc_intensity=False) pdb_rmax = vox_obj.rmax() if (vox_obj.rmax() < rmax): mom_obj, vox_obj, ipdb = pdb2zernike.zernike_moments( file, nmax=nmax, external_rmax=rmax, coef_out=False, calc_intensity=False) if (mom_obj is not None): if (len(pdb_models) == 0): ref_nlm_array = mom_obj.moments() pdb_models.append( pdb_model(mom_obj.moments().coefs(), file, pdb_rmax, ipdb)) ea = (0, 0, 0) write_pdb(file, vox_obj, ea, shift, ipdb) else: mov_nlm_array = mom_obj.moments() align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) pdb_models.append( pdb_model(align_obj.moving_nlm.coefs(), file, pdb_rmax, ipdb)) ea = align_obj.best_ea write_pdb(file, vox_obj, ea, shift, ipdb) return pdb_models
def pair_align(self): ms = flex.double() ss = flex.double() tmp_nlm_array = math.nlm_array( self.nmax ) for coef in self.finals: mean = abs( coef[0] ) var = flex.sum( flex.norm( coef ) ) sigma = smath.sqrt( var - mean*mean ) ms.append( mean ) ss.append( sigma) grids = flex.grid(self.n_trial, self.n_trial) self.cc_array=flex.double( grids, 1.0 ) for ii in range( self.n_trial ): self.nlm_array.load_coefs( self.nlm, self.finals[ii] ) for jj in range( ii ): tmp_nlm_array.load_coefs( self.nlm, self.finals[jj] ) cc = fft_align.align( self.nlm_array, tmp_nlm_array, nmax=self.nmax, refine=True ).best_score cc = (cc-ms[ii]*ms[jj])/(ss[ii]*ss[jj]) self.cc_array[(ii,jj)]=cc self.cc_array[(jj,ii)]=cc outfile = self.prefix+"pair.cc" comment = "# electron density correlation coefficient, < rho_1(r)*rho_2(r) >" out=open(outfile, 'w') print>>out, comment for ii in range(1,self.n_trial+1): print>>out,"%6d"%ii, print>>out, " average" for ii in range(self.n_trial): for jj in range(self.n_trial): print>>out,"%6.3f"%self.cc_array[(ii,jj)], print>>out, flex.mean( self.cc_array[ii*self.n_trial:(ii+1)*self.n_trial] ) out.close()
def computeCc(process_n, perSizeLeft, perSizeRight, perSize, listlength, filelist, nmax, rmax): #compute nlm_cc ccArr = [] start_line_left = process_n*perSizeLeft end_line_left = (process_n + 1)*perSizeLeft start_line_right = listlength - (process_n+1)*perSizeRight end_line_right = listlength - process_n*perSizeRight if start_line_right - end_line_left < 0: #arrive the stop point, the interval is the last difference of start_line_right and end_line_left middle = start_line_right - end_line_left + perSize end_line_left = end_line_left - perSizeLeft + int(xmath.ceil(middle / 2)) start_line_right = end_line_left print "process_n, start_left, end_left, start_right, end_right:",process_n, start_line_left, end_line_left, start_line_right, end_line_right for i in (range(start_line_left,end_line_left) + range(start_line_right,end_line_right)): for j in range(i+1,listlength): fix_model = model_interface.build_model( filelist[i], "pdb", nmax, rmax ) mov_model = model_interface.build_model( filelist[j], "pdb", nmax, rmax ) fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array align_obj = fft_align.align( fix_nlm_array, mov_nlm_array, nmax=nmax,refine=True) cc = align_obj.get_cc() ccArr.append((i, j, cc)) return ccArr
def build_starting_model(self): grids = flex.grid([self.np_on_grid * 2 + 1] * 3) self.start_model = flex.double(grids, 0.0) #### BUILD STARTING MODEL ##### self.ngp = self.start_model.size() for ii in self.indx_list: self.start_model[ii] = 1.0 self.molecule = self.indx_list self.raw_map = self.start_model.deep_copy() # a sphere print self.rmax, "RMAX" self.working_model = self.start_model.deep_copy( ) # Make a working model self.best_model = self.start_model.deep_copy() # Make a starting model #### Reusable Objects for moments calculation #### self.grid_obj = math.sphere_grid(self.np_on_grid, self.nmax) self.grid_obj.construct_space_sum_via_list(self.molecule) self.moment_obj = math.zernike_moments(self.grid_obj, self.nmax) moments = self.moment_obj.moments() self.zm = zm.zernike_model(moments, self.data.q, self.rmax, self.nmax) nn = self.moment_obj.fnn() self.calc_i = self.zm.calc_intensity(nn) self.calc_i = self.calc_i / self.calc_i[0] self.start_i = self.calc_i.deep_copy() self.start_nlm_coefs = moments.coefs().deep_copy() self.best_nlm_coefs = self.start_nlm_coefs.deep_copy() self.start_score = ((self.calc_i - self.data.i) / self.data.s).norm() out = open(self.prefix + 'start.iq', 'w') for qq, ic, io in zip(self.data.q, self.calc_i * self.scale_2_expt, self.data.i * self.scale_2_expt): print >> out, qq, ic, io out.close() self.nlm_array.load_coefs(self.nlm, self.start_nlm_coefs) self.start_m, self.start_s = get_mean_sigma(self.nlm_array) if (self.pdb_nlm is not None): self.pdb_m, self.pdb_s = get_mean_sigma(self.pdb_nlm) align_obj = fft_align.align(self.pdb_nlm, self.nlm_array, nmax=self.nmax, refine=True) cc = align_obj.best_score cc = (cc - self.start_m * self.pdb_m) / (self.start_s * self.pdb_s) print "C.C. (PDB, Start) = %8.5f, Score = %8.5f" % ( cc, self.start_score) xplor_map_type(self.raw_map, self.np_on_grid, self.rmax, file_name=self.prefix + 'start.xplor') print "Fraction: ", flex.sum(self.start_model) / (self.np_on_grid** 3.0) / 8.0
def build_starting_model(self): grids = flex.grid([self.np_on_grid*2+1]*3) self.start_model = flex.double(grids,0.0) #### BUILD STARTING MODEL ##### max_map = flex.max( self.raw_map ) self.molecule = flex.int() self.mod_list_1d = flex.int() self.mod_list = flex.vec3_double() cutoff = self.nmodel/2.0 distance_indx = flex.vec3_double() for ii in range(self.ngp ): if(self.raw_map[ii] >= cutoff): self.start_model[ii]=1.0 self.molecule.append( ii ) ## Need to be moved to mark region distance_indx.append( self.convert_indx_1_3( ii ) ) center_indx = (self.np_on_grid, self.np_on_grid, self.np_on_grid) distance = (distance_indx - center_indx).norms() max_dist = flex.max( distance ) / self.np_on_grid self.rmax = self.rmax*self.fraction/max_dist self.working_model= self.start_model.deep_copy() # Make a working model self.best_model = self.start_model.deep_copy() # Make a starting model #### Reusable Objects for moments calculation #### self.grid_obj=math.sphere_grid(self.np_on_grid, self.nmax) self.grid_obj.construct_space_sum_via_list( self.molecule, self.start_model.as_1d() ) self.moment_obj = math.zernike_moments( self.grid_obj, self.nmax ) moments = self.moment_obj.moments() self.zm = zm.zernike_model( moments, self.data.q, self.rmax, self.nmax) nn = self.moment_obj.fnn() self.calc_i = self.zm.calc_intensity(nn) self.calc_i = self.calc_i / self.calc_i[0] self.start_i = self.calc_i.deep_copy() self.start_nlm_coefs = moments.coefs().deep_copy() self.best_nlm_coefs = self.start_nlm_coefs.deep_copy() self.start_score= ( (self.calc_i-self.data.i)/self.data.s ).norm() out = open(self.prefix+'start.iq', 'w') for qq,ic,io in zip( self.data.q, self.calc_i*self.scale_2_expt, self.data.i*self.scale_2_expt): print>>out, qq, ic, io out.close() self.nlm_array.load_coefs( self.nlm, self.start_nlm_coefs ) self.start_m, self.start_s = get_mean_sigma( self.nlm_array ) if (self.pdb_nlm is not None): self.pdb_m, self.pdb_s = get_mean_sigma( self.pdb_nlm ) align_obj = fft_align.align(self.pdb_nlm, self.nlm_array, nmax=self.nmax, refine=True) cc = align_obj.best_score cc = ( cc - self.start_m*self.pdb_m ) / ( self.start_s*self.pdb_s ) print "C.C. (PDB, Start) = %8.5f, Score = %8.5f"%(cc, self.start_score) xplor_map_type( self.raw_map, self.np_on_grid, self.rmax, file_name=self.prefix+'start.xplor') print "Fraction: ", flex.sum(self.start_model)/(self.np_on_grid**3.0)/8.0
def refine(self, trial): print "--------------Trial %d-----------------"%trial, time.ctime() self.working_model = self.start_model.deep_copy() self.nlm_coefs = self.start_nlm_coefs.deep_copy() self.best_nlm_coefs = self.start_nlm_coefs.deep_copy() self.best_blq = self.start_blq.deep_copy() self.lowest_score = self.start_score init_scores = flex.double() while( init_scores.size() < 10 ): if(self.modify()): init_scores.append( self.target() ) mean = flex.mean( init_scores ) self.deltaS = smath.sqrt( flex.sum(flex.pow2(init_scores-mean) )/init_scores.size() ) self.T = self.deltaS * 20 self.nsteps = 500 self.score = mean self.working_model = self.start_model.deep_copy() while( True ): #self.T > self.deltaS/10.0): self.n_reject_this_round = 0 self.n_accept_this_round = 0 for ii in range( self.nsteps ): self.move() self.n_moves_this_round = self.n_reject_this_round + self.n_accept_this_round print "Number of moves: %d(out of %d)"%(self.n_moves_this_round, self.nsteps) print "Number of Accept/Reject: %d/%d"%(self.n_accept_this_round, self.n_reject_this_round) if( self.n_reject_this_round >= self.n_moves_this_round*0.9 ): print "Too Many rejections (%d), quit at temperature (%f)"%(self.n_reject_this_round, self.T) break self.T = self.T*0.9 self.nlm_array.load_coefs( self.nlm, self.best_nlm_coefs ) best_blq = self.blq_calculator.get_all_blq( self.nlm_array ) best_blq = best_blq/best_blq[0] out = open(self.prefix+str(trial)+'_final.blq', 'w') self.data.print_out(data=best_blq,out=out) out.close() print "total number of moves %d"%self.counter print "total number of accepted moves %d"%self.n_accept if (self.pdb_nlm is not None): align_obj = fft_align.align(self.pdb_nlm, self.nlm_array, nmax=self.nmax, refine=True) mean = abs( self.best_nlm_coefs[0] ) var = flex.sum( flex.norm( self.best_nlm_coefs ) ) sigma = smath.sqrt( var - mean*mean ) cc = align_obj.best_score cc = ( cc - mean*self.pdb_m ) / ( sigma*self.pdb_s ) print "C.C. (PDB, trial%6d) = %8.5f, Score = %8.5f"%(trial, cc, self.lowest_score) self.best_nlm_coefs = align_obj.moving_nlm.coefs() reconst_model = self.reconst_model( self.best_nlm_coefs ) xplor_map_type( reconst_model, self.np_on_grid, self.rmax, file_name=self.prefix+str(trial)+'_final_rbt.xplor') xplor_map_type( self.best_model, self.np_on_grid, self.rmax, file_name=self.prefix+str(trial)+'_final.xplor') print "-----------End of Trial %d--------------"%trial, time.ctime()
def tst_moments(nmax,np): grid_obj = tst_grid(nmax,np) mom_obj = math.zernike_moments(grid_obj, nmax) moments = mom_obj.moments() Fnl= mom_obj.fnl() Fnn= mom_obj.fnn() Fnnl= mom_obj.fnnl() eps = 1e-8 #print list(Fnl.coefs()) check_nl = [2.3370760050376607e-05, 2.1910087547227873e-07, 1.1145167882067956e-05, 1.1552795255443502e-05, 1.8332528001413832e-07, 2.7291537694166531e-07, 7.2474025554586763e-06, 2.3917327527551719e-05, 4.2203807328102694e-05] for i_pre, i_cal in zip( check_nl, Fnl.coefs()): assert abs(i_pre-i_cal) < eps check_nn = [1.1685380025188304e-05, 1.0955043773613935e-07, 1.6139115350383189e-05, 1.134898156875573e-05, 1.7412731546555787e-08, 2.2812032847790184e-07, 1.3014503682895901e-05, -4.7928189112959946e-06, 3.668426870555654e-05] for i_pre, i_cal in zip( check_nn, Fnn.coefs()): assert abs(i_pre-i_cal) < eps check_nnl=[(1.1685380025188304e-05+0j), 0j, 0j, (1.0955043773613935e-07+0j), (-1.6139115350383189e-05+0j), 0j, 0j, (5.5725839410339778e-06+0j), 0j, (5.7763976277217503e-06+0j), 0j, 0j, (-1.7412731546555787e-08+0j), 0j, 0j, 0j, 0j, (9.1662640007069148e-08+0j), 0j, (1.3645768847083266e-07+0j), (1.3014503682895901e-05+0j), 0j, 0j, (-8.9874088696083742e-06+0j), 0j, (1.3780227780904368e-05+0j), 0j, 0j, 0j, 0j, (3.6237012777293381e-06+0j), 0j, (1.1958663763775861e-05+0j), 0j, (2.1101903664051344e-05+0j)] for i_pre, i_cal in zip( check_nnl, Fnnl.coefs()): assert abs(i_pre-i_cal) < eps mom_0_0_0 = (0.00483433139642-0j) mom_1_1_0 = (0.000270247340704-0j) mom_1_1_1 = (-0.000191093727209+0.000191093727209j) mom_2_0_0 = (-0.00333843794042-0j) mom_2_2_1 = (-1.26396619825e-05+1.26396619825e-05j) mom_4_2_0 = (-0.00371195771764-0j) mom_4_4_0 = (0.00317650549416-0j) assert abs( moments.get_coef(0,0,0) - mom_0_0_0 ) < eps assert abs( moments.get_coef(1,1,0) - mom_1_1_0 ) < eps assert abs( moments.get_coef(1,1,1) - mom_1_1_1 ) < eps assert abs( moments.get_coef(2,0,0) - mom_2_0_0 ) < eps assert abs( moments.get_coef(2,2,1) - mom_2_2_1 ) < eps assert abs( moments.get_coef(4,2,0) - mom_4_2_0 ) < eps assert abs( moments.get_coef(4,4,0) - mom_4_4_0 ) < eps # do some alignment please from scitbx.math import zernike_align_fft as zafft fixed = mom_obj.moments() moving = mom_obj.moments() al_obj = zafft.align( fixed, moving) assert abs(al_obj.get_cc()-1) < 1e-3 return True
def calcc(modelId): nlm_array_mov = math.nlm_array(nmax) nlm = nlm_array_ref.nlm() nlm_array_mov.load_coefs(nlm, coefs[modelId][0:nlm_total]) align_obj = fft_align.align(nlm_array_ref, nlm_array_mov, nmax=nmax, refine=True) cc = align_obj.get_cc() print "c.c. between ", os.path.split( pdbfile)[-1], "and ", codes[modelId], "is ", cc return cc
def computeCc(filelist, nmax, rmax): #compute nlm_cc fix_model = model_interface.build_model(filelist[0], "pdb", nmax, rmax) mov_model = model_interface.build_model(filelist[1], "pdb", nmax, rmax) fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True) cc = align_obj.get_cc() print("when nmax:" + str(nmax) + "----------- cc:" + str(cc)) return cc
def refine(self, trial): print "--------------Trial %d-----------------"%trial, time.ctime() self.working_model = self.start_model.deep_copy() self.nlm_coefs = self.start_nlm_coefs.deep_copy() self.best_nlm_coefs = self.start_nlm_coefs.deep_copy() self.best_i = self.start_i.deep_copy() self.lowest_score = self.start_score init_scores = flex.double() for ii in range(10): self.modify() init_scores.append( self.target() ) mean = flex.mean( init_scores ) self.deltaS = smath.sqrt( flex.sum(flex.pow2(init_scores-mean) )/10.0 ) self.T = self.deltaS * 100 self.nsteps = 200 self.score = mean self.working_model = self.start_model.deep_copy() while( self.T > self.deltaS/2.0): self.n_reject = 0 for ii in range( self.nsteps ): self.move() print "Number of Accept/Reject: %d/%d"%(self.nsteps-self.n_reject, self.n_reject) if( self.n_reject > self.nsteps*0.9 ): print "Too Many rejections (%d), quit at temperature (%f)"%(self.n_reject, self.T) break self.T = self.T*0.9 out = open(self.prefix+str(trial)+'_final.iq', 'w') self.nlm_array.load_coefs( self.nlm, self.best_nlm_coefs ) best_i = self.zm.calc_intensity_nlm( self.nlm_array ) best_i = best_i/best_i[0]*self.scale_2_expt for qq,ic,io in zip( self.data.q, best_i, self.data.i*self.scale_2_expt): print>>out, qq, ic, io out.close() print "total number of moves %d"%self.counter print "total number of accepted moves %d"%self.n_accept if (self.pdb_nlm is not None): align_obj = fft_align.align(self.pdb_nlm, self.nlm_array, nmax=self.nmax, refine=True) mean = abs( self.best_nlm_coefs[0] ) var = flex.sum( flex.norm( self.best_nlm_coefs ) ) sigma = smath.sqrt( var - mean*mean ) cc = align_obj.best_score cc = ( cc - mean*self.pdb_m ) / ( sigma*self.pdb_s ) print "C.C. (PDB, trial%6d) = %8.5f, Score = %8.5f"%(trial, cc, self.lowest_score) self.best_nlm_coefs = align_obj.moving_nlm.coefs() reconst_model = self.reconst_model( self.best_nlm_coefs ) xplor_map_type( reconst_model, self.np_on_grid, self.rmax, file_name=self.prefix+str(trial)+'_final_rbt.xplor') xplor_map_type( self.best_model, self.np_on_grid, self.rmax, file_name=self.prefix+str(trial)+'_final.xplor') print "-----------End of Trial %d--------------"%trial, time.ctime()
def computeCc(process_total, process_n, perSizeLeft, perSizeRight, perSize, listlength, filelist, nmax, rmax): #compute nlm_cc ccArr = [] start_line_left = process_n * perSizeLeft end_line_left = (process_n + 1) * perSizeLeft start_line_right = listlength - (process_n + 1) * perSizeRight end_line_right = listlength - process_n * perSizeRight if start_line_right - end_line_left < 0: #arrive the stop point, the interval is the last difference of start_line_right and end_line_left middle = start_line_right - end_line_left + perSize end_line_left = end_line_left - perSizeLeft + int( xmath.ceil(middle / 2)) start_line_right = end_line_left #if stop before left meet right if ((start_line_right - end_line_left > 0) and (process_n == process_total)): end_line_left = start_line_right print("process_n, start_left, end_left, start_right, end_right:", process_n, start_line_left, end_line_left, start_line_right, end_line_right) for i in (range(start_line_left, end_line_left) + range(start_line_right, end_line_right)): for j in range(i + 1, listlength): fix_nlm_array = nlm_array(nmax) mov_nlm_array = nlm_array(nmax) fix_nlm = fix_nlm_array.nlm() mov_nlm = mov_nlm_array.nlm() fix = filelist[i] mov = filelist[j] fix_nlm_array.load_coefs(fix_nlm, fix) mov_nlm_array.load_coefs(mov_nlm, mov) # t0 = time.time() align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True) # t1 = time.time() # print("align time used:" , t1-t0) cc = align_obj.get_cc() # t2 = time.time() # print("get cc time used:" , t2-t1) ccArr.append((i, j, cc)) print(i, j, cc) return ccArr
def build_starting_model(self): grids = flex.grid([self.np_on_grid*2+1]*3) self.start_model = flex.double(grids,0.0) #### BUILD STARTING MODEL ##### self.ngp = self.start_model.size() for ii in self.indx_list: self.start_model[ii]=1.0 self.molecule = self.indx_list self.raw_map = self.start_model.deep_copy() # a sphere print self.rmax #### Reusable Objects for moments calculation #### self.grid_obj=math.sphere_grid(self.np_on_grid, self.nmax) self.grid_obj.construct_space_sum_via_list( self.molecule ) #, self.start_model.as_1d() ) self.moment_obj = math.zernike_moments( self.grid_obj, self.nmax ) self.moments = self.moment_obj.moments() self.blq_calculator = fxs_tools.znk_blq(self.moments,self.data.q,self.rmax,self.nmax,self.lmax) self.calc_blq = self.blq_calculator.get_all_blq2() print self.calc_blq[0], "self.calc_blq[0]" self.calc_blq = self.calc_blq / self.calc_blq[0] # normalized to b00 (largest, probably) self.start_blq = self.calc_blq.deep_copy() self.start_nlm_coefs = self.moments.coefs().deep_copy() self.best_nlm_coefs = self.start_nlm_coefs.deep_copy() self.sigma = flex.sqrt( flex.abs(self.data.blq) + 1e-10) #self.sigma=self.data.blq+1e-10 self.start_score= ( (self.calc_blq-self.data.blq)/self.sigma).norm() print "---- Starting Score ---- %f"%self.start_score self.nlm_array.load_coefs( self.nlm, self.start_nlm_coefs ) self.start_m, self.start_s = get_mean_sigma( self.nlm_array ) if (self.pdb_nlm is not None): self.pdb_m, self.pdb_s = get_mean_sigma( self.pdb_nlm ) align_obj = fft_align.align(self.pdb_nlm, self.nlm_array, nmax=self.nmax, refine=True) cc = align_obj.best_score cc = ( cc - self.start_m*self.pdb_m ) / ( self.start_s*self.pdb_s ) print "C.C. (PDB, Start) = %8.5f, Score = %8.5f"%(cc, self.start_score) xplor_map_type( self.raw_map, self.np_on_grid, self.rmax, file_name=self.prefix+'start.xplor') print "Fraction: ", flex.sum(self.start_model)/(self.np_on_grid**3.0)/8.0
def computeCc(thread_n, arr_cc): #compute nlm_cc print"----------thread_cc----------",thread_n mov_nlm_array = mov_model.nlm_array fix_nlm_array = math.nlm_array(nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() nlmRes = [] if ((thread_n+1)*perSizeCc < nlNum): # range(x,y) => [x,y-1] end_line = (thread_n+1)*perSizeCc else: end_line = nlNum for i in range(start_line, end_line): indx = sortedNlRes[i][0] fix = nlm_coefs[indx][0:nlm_total] fix_nlm_array.load_coefs(nlm, fix) align_obj = fft_align.align( fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True ) cc = align_obj.get_cc() nlmRes.append((indx, codes[indx], cc)) arr_cc.extend(nlmRes)
def run(args): params = get_input(args, master_params, "align", banner, help) if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map fix_model=model_interface.build_model( fix, typef, nmax, rmax ) mov_model=model_interface.build_model( mov, typem, nmax, rmax ) fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array print "doing alignment" align_obj = fft_align.align( fix_nlm_array, mov_nlm_array, nmax=nmax, topn=topn ,refine=True) cc = align_obj.get_cc() mov_model.nlm_array = align_obj.moving_nlm rmax = update_rmax( rmax, fix_model, mov_model) fix_model.rmax = rmax mov_model.rmax = rmax shift=(rmax, rmax, rmax) print "############# SUMMARY of ALIGNMENT #############" print "Correlation Coefficient Between two models is: ", cc print "Rmax is : ", rmax print "Center of Mass is shifted to : ", list(shift) print "OUTPUT files are : " current_is_mov = False for model in (fix_model, mov_model): #base=model.id ##################20170520########################### #################change the output dir ################### base = str(model.id.split("/")[-1]) dirlist = sys.argv[0].split("sastbx") tmpdir = str(dirlist[0])+"sastbx/gui/sasqt/tmp.txt" with open(tmpdir,"r") as f: targetdir = str(f.read().strip()) base = os.path.join(targetdir,"superpose",base) ############################################################### easy_pickle.dump(base+"_za.nlm", model.nlm_array.coefs() ) print " "+base+"_za.nlm" if(write_map): model.write_map(filename=base+"_za.xplor") print " "+base+"_za.xplor" if( model.vox_obj is not None): ### Write aligned PDB file #### out_pdb_name=base+"_za.pdb" if(current_is_mov): ea = align_obj.best_ea aligned_xyz = model.vox_obj.rotate((-ea[0],ea[1],-ea[2]), False) else: aligned_xyz = model.vox_obj.xyz() aligned_xyz = aligned_xyz + shift ### Add the shift, such that the EDM center is the same as PDB ###################20170511##################################### ################debug for size error############################ #model.pdb_inp.hierarchy.atoms().set_xyz(aligned_xyz) sel_cache = model.pdb_inp.hierarchy.atom_selection_cache() hetero = model.pdb_inp.hierarchy.atoms().extract_hetero() position = list(hetero) no_hetero = sel_cache.selection("all") for i in position: no_hetero[i]=False no_hetero_atoms = model.pdb_inp.hierarchy.atoms().select(no_hetero) no_hetero_atoms.set_xyz(aligned_xyz) model.pdb_inp.hierarchy.write_pdb_file( file_name=out_pdb_name, open_append=False) print " "+out_pdb_name current_is_mov = True print "############# END of SUMMARY #############"
def build_map(nmax, rmax, coefs, codes, model_indx, pdb_models, prefix, clusters=None, fract=0.9, type='.ccp4'): global stdfile global outfilelog rmax_over_fraction = rmax / fract np_on_grid = 30 zga = math.zernike_grid(np_on_grid, nmax, False) ref_nlm_array = math.nlm_array(nmax) mov_nlm_array = math.nlm_array(nmax) nlm = ref_nlm_array.nlm() nlm_total = ref_nlm_array.coefs().size() top_cc = flex.double() top_ids = [] ntop = model_indx.size() rank = 0 ave_c = flex.complex_double(nlm_total, 0) aligned_coefs = [] map_files = [] levels = [] scale_model = False if ((pdb_models is not None) and (pdb_models[0].rmax > rmax)): scale_model = True external_rmax = pdb_models[0].rmax grid = build_3d_grid(np_on_grid, rmax_over_fraction) with open(stdfile, "a") as log: print >> log, "Rank PDB_code cc (to the given model or the first model):" print "Rank PDB_code cc (to the given model or the first model):" if (pdb_models is not None): ref_nlm_array.load_coefs(nlm, pdb_models[0].nlm_coef) fraction = 0 else: c = coefs[model_indx[0]][0:nlm_total] ave_c = c.deep_copy() ref_nlm_array.load_coefs(nlm, c) rank = 1 if prefix != None: filename = prefix + "m" + str(rank) + "_" + codes[model_indx[0]] #filename = os.path.join(prefix,"m"+str(rank)+"_"+codes[ model_indx[0] ]) else: filename = "m" + str(rank) + "_" + codes[model_indx[0]] map_files.append(filename + type) fraction, map = write_map(zga, nlm, c, np_on_grid, rmax_over_fraction, filename, type=type) #level = flex.max(map)/3.0 print "map in search pdb.py: \n" print map level = map.standard_deviation_of_the_sample() print "level in search pdb: ", level levels.append(level) top_cc.append(1.0) # refering to itself if (scale_model): c = get_moments_for_scaled_model(map, np_on_grid, grid, nmax, rmax, external_rmax) with open(stdfile, "a") as log: print >> log, rank, codes[model_indx[0]] print rank, codes[model_indx[0]] aligned_coefs.append(c.deep_copy()) # save the aligned nlm coefs mean_frac = fraction mean_sqr_frac = fraction * fraction for ii in model_indx[rank:]: rank = rank + 1 c = coefs[ii][0:nlm_total] mov_nlm_array.load_coefs(nlm, c) align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) new_c = align_obj.moving_nlm.coefs() if prefix != None: #filename = os.path.join(prefix,"m"+str(rank)+"_"+codes[ii]) filename = prefix + "m" + str(rank) + "_" + codes[ii] print "**********************************" print "outfilelog: ", outfilelog with open(outfilelog, "a") as f: print >> f, filename + ".ccp4" else: filename = "m" + str(rank) + "_" + codes[ii] with open(outfilelog, "a") as f: print >> f, filename + ".ccp4" map_files.append(filename + type) fraction, map = write_map(zga, nlm, new_c, np_on_grid, rmax_over_fraction, filename, type=type) if (scale_model): c = get_moments_for_scaled_model(map, np_on_grid, grid, nmax, rmax, external_rmax) mov_nlm_array.load_coefs(nlm, c) align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) new_c = align_obj.moving_nlm.coefs() fraction, map = write_map(zga, nlm, new_c, np_on_grid, rmax_over_fraction, filename, type=type) level = flex.max(map) / 3.0 levels.append(level) cc = align_obj.get_cc() with open(stdfile, "a") as log: print >> log, "%2d %5s %5.3f" % (rank, codes[ii], cc) print "%2d %5s %5.3f" % (rank, codes[ii], cc) top_cc.append(cc) top_ids.append(codes[ii]) ave_c = ave_c + new_c aligned_coefs.append(new_c.deep_copy()) # save the aligned nlm coefs mean_frac = mean_frac + fraction mean_sqr_frac = mean_sqr_frac + fraction * fraction sphere_volume = rmax_over_fraction**3.0 * 8.0 # cube with d=2.0*r mean_frac = mean_frac / ntop sigma_frac = smath.sqrt(mean_sqr_frac / ntop - mean_frac * mean_frac) with open(stdfile, "a") as log: print >> log, "Volume is ", mean_frac * sphere_volume, "+/-", sigma_frac * sphere_volume, "(A^3)" print "Volume is ", mean_frac * sphere_volume, "+/-", sigma_frac * sphere_volume, "(A^3)" #### Write average map #### ave_maps = [] ave_levels = [] ave_cc = [] cluster_ids = [1] * len(model_indx) # initialize cluster_ids # filename = "ave_map" # map_files.append( filename+type ) # fraction, map=write_map( zga, nlm, ave_c/ntop, np_on_grid, rmax_over_fraction, filename, type=type ) # levels.append( flex.max(map)/3.0 ) # if( len(clusters.nodes) == 1): return top_cc, top_ids, map_files, levels, [1]*len(map_files) cluster_id = 1 with open(stdfile, "a") as log: print >> log, "cc. between Cluster average and PDB model" print "cc. between Cluster average and PDB model" for node in clusters.nodes: ave_c = ave_c * 0 coefs_list = [] for ii in node.leaf_eles: ave_c = ave_c + aligned_coefs[ii] cluster_ids[ii] = cluster_id coefs_list.append(aligned_coefs[ii]) ave_c = ave_c / len(node.leaf_eles) level_n = model_consistency.good2n(nmax, coefs_list, ave_c, threshold=0.90, outfile=stdfile) with open(stdfile, "a") as log: print >> log, "consistency level to order n: %d" % level_n print "consistency level to order n: %d" % level_n mov_nlm_array.load_coefs(nlm, ave_c) align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) cc = align_obj.get_cc() ave_cc.append(cc) with open(stdfile, "a") as log: print >> log, "cluster # ", cluster_id, "cc=", cc print "cluster # ", cluster_id, "cc=", cc if prefix == None: filename = "ave_" + str(cluster_id) with open(outfilelog, "a") as f: print >> f, filename else: filename = prefix + "ave_" + str(cluster_id) with open(outfilelog, "a") as f: print >> f, filename + ".ccp4" ave_maps.append(filename + type) fraction, map = write_map(zga, nlm, ave_c, np_on_grid, rmax_over_fraction, filename, type=type) ave_levels.append(flex.max(map) / 3.0) with open(stdfile, "a") as log: print >> log, "Averaged Model #%d Volume is %f (A^3)" % ( cluster_id, fraction * sphere_volume) print "Averaged Model #%d Volume is %f (A^3)" % (cluster_id, fraction * sphere_volume) cluster_id = cluster_id + 1 # with open(stdfile,"a") as log: # log.write("__END__") return top_cc, top_ids, map_files, levels, cluster_ids, ave_maps, ave_levels, ave_cc
def pair_align(self, nlm_coefs, calc_cc=True): self.cc_array = [] for ii in range(self.ntop): self.cc_array.append(flex.double(self.ntop, 1)) if (nlm_coefs is not None and calc_cc): comment = "# Correlation Coefficient <rho_1(r)*rho_2(r)>" fix_nlm_array = math.nlm_array(self.nmax) mov_nlm_array = math.nlm_array(self.nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() top_coefs = [] mean = flex.double() sig = flex.double() for ii in self.best_indices: fix = nlm_coefs[ii][0:nlm_total] top_coefs.append(fix) fix_nlm_array.load_coefs(nlm, fix) m, s = get_mean_sigma(fix_nlm_array) mean.append(m) sig.append(s) for ii in range(self.ntop): fix = top_coefs[ii] fix_nlm_array.load_coefs(nlm, fix) for jj in range(ii): mov = top_coefs[jj] mov_nlm_array.load_coefs(nlm, mov) cc = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=self.nmax, refine=True).best_score cc = (cc - mean[ii] * mean[jj]) / (sig[ii] * sig[jj]) self.cc_array[ii][jj] = cc self.cc_array[jj][ii] = cc else: # There is no nlm coefs loaded comment = "# Coefficient distance, similar to the eq. (12) in L. Mak et al, JMGM.26 (2008) P.1035" all_nn_coefs = [] for ii in range(self.ntop): nn_i = self.coefs[self.best_models[ii]].deep_copy() nn_i = nn_i / nn_i[0] all_nn_coefs.append(nn_i) for ii in range(self.ntop): for jj in range(ii + 1): cc = (all_nn_coefs[ii] - all_nn_coefs[jj]).norm() self.cc_array[ii][jj] = cc self.cc_array[jj][ii] = cc outfile = self.prefix + ".cc" out = open(outfile, 'w') print >> out, comment for ii in range(1, self.ntop + 1): print >> out, "%6d" % ii, print >> out, " average" for ii in range(self.ntop): for jj in range(self.ntop): print >> out, "%6.3f" % self.cc_array[ii][jj], print >> out, "%6.3f" % flex.mean(self.cc_array[ii]) clusters = hcluster.hcluster(self.cc_array, 0.8) clusters.print_hclust() out.close() tree_dot_file = self.prefix + ".tree" clusters.print_dot(tree_dot_file) clusters.print_neato()
def run(args): #targetfile = $SASTBXPATH/modules/cctbx_project/sastbx targetfile = os.path.join(os.path.split(sys.path[0])[0],"superpose.txt") with open(targetfile,"w") as f: f.truncate() tempf = open(targetfile,'w') params = get_input(args, master_params, "align", banner, help,tempf) tempf.close() if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map fix_model=model_interface.build_model( fix, typef, nmax, rmax ) mov_model=model_interface.build_model( mov, typem, nmax, rmax ) fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array with open(targetfile,"a") as f: f.write("doing alignment\n") print "doing alignment" align_obj = fft_align.align( fix_nlm_array, mov_nlm_array, nmax=nmax, topn=topn ,refine=True) cc = align_obj.get_cc() print cc mov_model.nlm_array = align_obj.moving_nlm rmax = update_rmax( rmax, fix_model, mov_model) fix_model.rmax = rmax mov_model.rmax = rmax shift=(rmax, rmax, rmax) with open(targetfile,"a") as f: f.write( "############# SUMMARY of ALIGNMENT #############\n") f.write( "Correlation Coefficient Between two models is: "+str(cc)+"\n") f.write("Rmax is : "+str(rmax)+"\n") f.write("Center of Mass is shifted to : "+str(list(shift))+"\n") f.write("OUTPUT files are : "+"\n") print "############# SUMMARY of ALIGNMENT #############" print "Correlation Coefficient Between two models is: ", cc print "Rmax is : ", rmax print "Center of Mass is shifted to : ", list(shift) print "OUTPUT files are : " current_is_mov = False pdblist = [] xplorlist = [] targetpath_fromGUI = '' targetpath_fromGUI_file = os.path.join(base_path,"targetpath_GUI.txt") if os.path.isfile(targetpath_fromGUI_file) and (os.stat(targetpath_fromGUI_file).st_size>0): with open(targetpath_fromGUI_file,"r") as f: targetpath_fromGUI = f.read().strip() for model in (fix_model, mov_model): if targetpath_fromGUI == '': base=model.id else: base = str(model.id.split("/")[-1]) print "base: ",base targetdir = os.path.join(targetpath_fromGUI,"Model_Superposition") base = os.path.join(targetdir,base) ##################20170520########################### #################change the output dir ################### # base = str(model.id.split("/")[-1]) # dirlist = sys.argv[0].split("sastbx") # tmpdir = str(dirlist[0])+"sastbx/gui/sasqt/tmp.txt" # with open(tmpdir,"r") as f: # targetdir = str(f.read().strip()) # base = os.path.join(targetdir,"superpose",base) ############################################################### # easy_pickle.dump(base+"_za.nlm", model.nlm_array.coefs() ) # with open(targetfile,"a") as f: # f.write(" "+base+"_za.nlm\n") # if(write_map): # model.write_map(filename=base+"_za.xplor") # xplorlist.append(base+"_za.xplor") # with open(targetfile,"a") as f: # f.write(" "+base+"_za.xplor\n") # if( model.vox_obj is not None): ### Write aligned PDB file #### # out_pdb_name=base+"_za.pdb" # pdblist.append(out_pdb_name) # if(current_is_mov): # ea = align_obj.best_ea # aligned_xyz = model.vox_obj.rotate((-ea[0],ea[1],-ea[2]), False) # else: # aligned_xyz = model.vox_obj.xyz() # aligned_xyz = aligned_xyz + shift ### Add the shift, such that the EDM center is the same as PDB ###################20170511##################################### ################debug for size error############################ #model.pdb_inp.hierarchy.atoms().set_xyz(aligned_xyz) # sel_cache = model.pdb_inp.hierarchy.atom_selection_cache() # hetero = model.pdb_inp.hierarchy.atoms().extract_hetero() # position = list(hetero) # no_hetero = sel_cache.selection("all") # for i in position: # no_hetero[i]=False # no_hetero_atoms = model.pdb_inp.hierarchy.atoms().select(no_hetero) # no_hetero_atoms.set_xyz(aligned_xyz) # model.pdb_inp.hierarchy.write_pdb_file( file_name=out_pdb_name, open_append=False) # with open(targetfile,"a") as f: # f.write(" "+out_pdb_name+'\n') # print out_pdb_name # current_is_mov = True # print "pdblist: ",pdblist # print "xplorlist: ", xplorlist ############targetpath_fromGUI=='' for commmand line ############else for GUI if targetpath_fromGUI != '': targetdir = os.path.join(targetpath_fromGUI,"Model_Superposition") build_pymol_script.write_pymol_superpose(pdblist,targetdir) with open(targetfile,"a") as f: f.write("############# END of SUMMARY #############\n") with open(targetfile,"a") as f: f.write("__END__") print "############# END of SUMMARY #############\n" print "__END__"
def build_map(nmax, shapes, coefs, codes, pdb_models): np_on_grid = 30 zga = math.zernike_grid(np_on_grid, nmax, False) ref_nlm_array = math.nlm_array(nmax) mov_nlm_array = math.nlm_array(nmax) nlm = ref_nlm_array.nlm() nlm_total = ref_nlm_array.coefs().size() top_cc = flex.double() ntop = shapes.ntop rank = 0 ave_c = flex.complex_double(nlm_total, 0) if (pdb_models is not None): ref_nlm_array.load_coefs(nlm, pdb_models[0].nlm_coef) fraction = 0 else: c = coefs[shapes.best_indices[0]][0:nlm_total] ave_c = c.deep_copy() ref_nlm_array.load_coefs(nlm, c) rank = 1 filename = "m" + str(rank) + "_" + shapes.best_codes[0] + ".xplor" fraction = write_xplor(zga, nlm, c, np_on_grid, shapes.best_rmax[0], filename) print rank, shapes.best_codes[0], fraction ref_mean, ref_s = get_mean_sigma(ref_nlm_array) mean_frac = fraction mean_sqr_frac = fraction * fraction for ii, code in zip(shapes.best_indices[rank:], shapes.best_codes[rank:]): coef = coefs[ii][0:nlm_total] mov_nlm_array.load_coefs(nlm, coef) mov_mean, mov_s = get_mean_sigma(mov_nlm_array) align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) new_c = align_obj.moving_nlm.coefs() cc = align_obj.get_cc() top_cc.append(cc) ave_c = ave_c + new_c filename = "m" + str(rank + 1) + "_" + code + ".xplor" fraction = write_xplor(zga, nlm, new_c, np_on_grid, shapes.best_rmax[rank], filename) rank = rank + 1 print rank, code, fraction mean_frac = mean_frac + fraction mean_sqr_frac = mean_sqr_frac + fraction * fraction #sphere_volume = 4.0/3.0*smath.pi*rmax*rmax*rmax rmax = shapes.best_rmax[0] sphere_volume = rmax * rmax * rmax * 8.0 mean_frac = mean_frac / ntop sigma_frac = smath.sqrt(mean_sqr_frac / ntop - mean_frac * mean_frac) print "Volume is ", mean_frac * sphere_volume, "+/-", sigma_frac * sphere_volume #### Write average map #### filename = "ave_map.xplor" write_xplor(zga, nlm, ave_c / ntop, np_on_grid, rmax, filename) return top_cc
def run(fix, mov, out_pdb_name=None): #targetfile = $SASTBXPATH/modules/cctbx_project/sastbx ''' targetfile = os.path.join(os.path.split(sys.path[0])[0],"superpose.txt") with open(targetfile,"w") as f: f.truncate() tempf = open(targetfile,'w') params = get_input(args, master_params, "align", banner, help,tempf) tempf.close() if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map print fix,typef,mov,typem,num_grid,nmax,rmax,topn,write_map ''' fix_model = model_interface.build_model(fix, 'pdb', 20, None) mov_model = model_interface.build_model(mov, 'pdb', 20, None) fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array ''' with open(targetfile,"a") as f: f.write("doing alignment\n") print "doing alignment" ''' align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=20, topn=10, refine=True) cc = align_obj.get_cc() if out_pdb_name is not None: ea = align_obj.best_ea aligned_xyz = mov_model.vox_obj.rotate((-ea[0], ea[1], -ea[2]), False) #rmax = update_rmax( 1, fix_model, mov_model) rmax = fix_model.rmax shift = (rmax, rmax, rmax) aligned_xyz = aligned_xyz #+ shift ### Add the shift, such that the EDM center is the same as PDB mov_model.pdb_inp.hierarchy.atoms().set_xyz(aligned_xyz) #out_pdb_name='aligned_'+mov mov_model.pdb_inp.hierarchy.write_pdb_file(file_name=out_pdb_name, open_append=False) # print cc return cc '''
def run(args, outpath=None): params = get_input(args, master_params, "align", banner, help) if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map #outpath=params.align.outpath fix_model = model_interface.build_model(fix, typef, nmax, rmax) mov_model = model_interface.build_model(mov, typem, nmax, rmax) #fix_nl_array = fix_model.nl_array #mov_nl_array = mov_model.nl_array #CHI2 = flex.sum_sq( fix_nl_array.coefs() - mov_nl_array.coefs() ) #print "CHI2 between Fnl's is %e\n"%CHI2 fix_nlm_array = fix_model.nlm_array mov_nlm_array = mov_model.nlm_array print "doing alignment" align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, topn=topn, refine=True) cc = align_obj.get_cc() mov_model.nlm_array = align_obj.moving_nlm rmax = update_rmax(rmax, fix_model, mov_model) fix_model.rmax = rmax mov_model.rmax = rmax shift = (rmax, rmax, rmax) print "############# SUMMARY of ALIGNMENT #############" print "Correlation Coefficient Between two models is: ", cc print "Rmax is : ", rmax print "Euler angles for the moving object is : ", list( align_obj.best_ea) print "Center of Mass is shifted to : ", list(shift) print "OUTPUT files are : " current_is_mov = False for model in (fix_model, mov_model): #base = model.id base = outpath + '/' + mov.split("/")[-1][:-4] ''' easy_pickle.dump(base+"_za.nlm", model.nlm_array.coefs() ) print " "+base+"_za.nlm" ''' if (current_is_mov): model.map = None if (write_map): model.write_map(filename=base + "_za.xplor") print " " + base + "_za.xplor" if (model.vox_obj is not None): ### Write aligned PDB file #### out_pdb_name = base + "_za.pdb" if (current_is_mov): ea = align_obj.best_ea aligned_xyz = model.vox_obj.rotate((-ea[0], ea[1], -ea[2]), False) else: aligned_xyz = model.vox_obj.xyz() aligned_xyz = aligned_xyz + shift ### Add the shift, such that the EDM center is the same as PDB model.pdb_inp.hierarchy.atoms().set_xyz(aligned_xyz) model.pdb_inp.hierarchy.write_pdb_file(file_name=out_pdb_name, open_append=False) print " " + out_pdb_name current_is_mov = True print "############# END of SUMMARY #############"
def find_relatives(ids, cc_min, cc_max, rmax, codes, moments, nmax=10): indices = flex.int() idlist = open('id_list.txt', 'r') for id in idlist: id = id[0:4] indices.append(flex.first_index(codes, id)) r_max = easy_pickle.load(prefix + 'pisa.rmax') nns = easy_pickle.load(prefix + 'pisa.nn') nn_array = math.nl_array(nmax) nn_indx = nn_array.nl() nn_total = nn_indx.size() q_array = flex.double(range(501)) / 2000.0 ref_nlm_array = math.nlm_array(nmax) target_nlm_array = math.nlm_array(nmax) nlm = ref_nlm_array.nlm() coef_size = nlm.size() all_indices = range(codes.size()) small_q_array = flex.double(range(51)) / 300.0 mean = [] sig = [] for indx in indices: print indx #rmax = 50.0 #r_max[indx] ref_coef = moments[indx] ref_nlm_array.load_coefs(nlm, ref_coef[0:coef_size]) z_model = zernike_model(ref_nlm_array, q_array, rmax, nmax) out_name = codes[indx] + "_.qi" nn_array.load_coefs(nn_indx, nns[indx][0:nn_total]) ref_int = put_intensity(z_model, q_array, nn_array, out_name) mean_r = ref_int * 0.0 sig_r = ref_int * 0.0 small_z_model = zernike_model(ref_nlm_array, small_q_array, rmax, nmax) small_ref_int = small_z_model.calc_intensity(nn_array) small_ref_int = small_ref_int / small_ref_int[0] N = 0.0 for coef, ii in zip(moments, all_indices): if N > 25: break target_nlm_array.load_coefs(nlm, coef[0:coef_size]) align_obj = fft_align.align(ref_nlm_array, target_nlm_array, nmax=nmax, topn=10, refine=False) cc = align_obj.get_cc() if (cc >= cc_min and cc <= cc_max): N += 1 nn_array.load_coefs(nn_indx, nns[ii][0:nn_total]) opt_r_obj = optimize_r(nn_array, small_ref_int, small_q_array, nmax) opt_r = gss(opt_r_obj.target, rmax * 0.8, rmax * 1.2) z_model = zernike_model(ref_nlm_array, q_array, opt_r, nmax) out_name = codes[indx] + "_" + codes[ii] + ".qi.rel" mod_int = put_intensity(z_model, q_array, nn_array, out_name, ref_int) out_name = codes[indx] + "_" + codes[ii] + ".qi" put_intensity(z_model, q_array, nn_array, out_name) mod_int = mod_int - 1.0 mean_r += mod_int sig_r += mod_int * mod_int print ii, cc, codes[ii], opt_r if N > 3: mean_r /= N sig_r = sig_r / N - mean_r * mean_r mean.append(mean_r) sig.append(sig_r) N = len(mean) if N > 0: mean_r = mean[0] * 0.0 s_r = mean[0] * 0.0 for uu in range(N): mean_r += mean[uu] s_r += sig[uu] mean_r /= N s_r /= N s_r = flex.sqrt(s_r) f = open('q_m_s_%s.dat' % rmax, 'w') for q, m, s in zip(q_array, mean_r, s_r): print >> f, q, m, s
def run(args): # filename = "res" + str(filenum) + ".txt" targetfile = os.path.join(os.path.split(sys.path[0])[0], "c5") with open(targetfile, "w") as f: f.truncate() tempf = open(targetfile, 'w') print args params = get_input(args, master_params, "aligndb", banner, help, tempf) tempf.close() if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map nlNum = params.align.nlnum nlmNum = params.align.nlmnum #fix_model=model_interface.build_model( fix, typef, nmax, rmax ) mov_model = model_interface.build_model(mov, typem, nmax, rmax) # prefix="/home/dongxq/align_code/dude-actives" prefix = "/home/dongxq/zalign/build/myDB" codes = easy_pickle.load(prefix + ".codes") nlm_coefs = easy_pickle.load(prefix + ".nlm") nl_coefs = easy_pickle.load(prefix + ".nl") rmaxs = easy_pickle.load(prefix + ".rmax") #compute distance nlRes = [] mov_nl_array = mov_model.nl_array mov_nl_coefs = mov_model.nl_array.coefs() tnl1 = time.time() for indx in range(len(nl_coefs)): #compute Chi-sequare distance mf_coef = numpy.true_divide(nl_coefs[indx], mov_nl_coefs) dist = numpy.sum(numpy.square(mov_nl_coefs - mf_coef * nl_coefs[indx])) #compute Mahalanobis distance # dist = mol2.Mahalanobis(mov_nl_coefs,nl_coefs[indx]) nlRes.append((indx, dist, codes[indx])) sortedNlRes = sorted(nlRes, key=operator.itemgetter(1), reverse=False) tnl2 = time.time() # compute nl_cc # nl_cc_res = [] # mov_nl_array = mov_model.nl_array # mov_nl_coefs = mov_model.nl_array.coefs() # tnl1 = time.time() # for indx in range(len(nl_coefs)): # nl_cc = pearson.pearson_cc(mov_nl_coefs, nl_coefs[indx]) # print nl_cc # nl_cc_res.append((indx, nl_cc, codes[indx])) # sortedNlRes = sorted(nl_cc_res, key=operator.itemgetter(1), reverse=True) # tnl2 = time.time() #compute nlm_cc mov_nlm_array = mov_model.nlm_array fix_nlm_array = math.nlm_array(nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() nlmRes = [] tnlm1 = time.time() for i in range(nlNum): indx = sortedNlRes[i][0] fix = nlm_coefs[indx][0:nlm_total] fix_nlm_array.load_coefs(nlm, fix) align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True) cc = align_obj.get_cc() nlmRes.append((indx, codes[indx], cc)) sortedNlmRes = sorted(nlmRes, key=operator.itemgetter(2), reverse=True) sortedNlmRes = sortedNlmRes[:nlmNum] tnlm2 = time.time() #merge chi to cc arr tmerge1 = time.time() for i in range(nlmNum): indx = sortedNlmRes[i][0] chi = list(filter(lambda j: j[0] == indx, sortedNlRes[0:]))[0][1] sortedNlmRes[i] += (chi, ) tmerge2 = time.time() print "merge time used: ", tmerge2 - tmerge1 #output with open(targetfile, "w") as f: f.write("############# SUMMARY of ALIGNMENT #############\n") f.write( "rank indx name cc chi-square\n") rank = 0 for arr in sortedNlmRes: rank += 1 arr = (rank, ) + arr f.write(str(arr) + "\n") t3 = time.time() f.write("rotation invariant computing time used: " + str(tnl2 - tnl1) + "\n") f.write("alignment computing time used: " + str(tnlm2 - tnlm1) + "\n") f.write("total time used: : " + str(t3 - t1))