def good2n(nmax, coefs_list, ref_coefs, threshold=0.90, outfile=''): ## cc=0.90 is equivalent to 5% mutation in real space at nmax<=10 max_indx = math.nlm_array(nmax).nlm().size() for nn in range(nmax, 1, -1): min_indx = math.nlm_array(nn - 1).nlm().size() #coef_0 = ref_coefs[min_indx:max_indx] coef_0 = ref_coefs[0:max_indx] mean_0 = abs(ref_coefs[0]) sigma_0 = flex.sum(flex.norm(coef_0)) - mean_0**2 sigma_0 = smath.sqrt(sigma_0) cc_array = flex.double() #out = open(outfile,"w") for coef in coefs_list: #coef_1 = coef[min_indx:max_indx] coef_1 = coef[0:max_indx] mean_1 = abs(coef[0]) sigma_1 = flex.sum(flex.norm(coef_1)) - mean_1**2 sigma_1 = smath.sqrt(sigma_1) cov_01 = abs(flex.sum(coef_0 * flex.conj(coef_1))) cov_01 = cov_01 - mean_0 * mean_1 this_cc = cov_01 / sigma_1 / sigma_0 cc_array.append(this_cc) out = open(outfile, "a") print >> out, this_cc out.close() print this_cc mean_cc = flex.mean(cc_array) out = open(outfile, "a") print >> out, "level n: ", nn, mean_cc out.close() print "level n: ", nn, mean_cc if (mean_cc >= threshold): return nn max_indx = min_indx return nn
def pair_align(self, nlm_coefs, calc_cc=True): self.cc_array = [] for ii in range(self.ntop): self.cc_array.append(flex.double(self.ntop, 1)) if (nlm_coefs is not None and calc_cc): comment = "# Correlation Coefficient <rho_1(r)*rho_2(r)>" fix_nlm_array = math.nlm_array(self.nmax) mov_nlm_array = math.nlm_array(self.nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.coefs().size() top_coefs = [] mean = flex.double() sig = flex.double() for ii in range(self.ntop): ff = self.best_models[ii] fix = nlm_coefs[ff][0:nlm_total] top_coefs.append(fix) fix_nlm_array.load_coefs(nlm, fix) m, s = get_mean_sigma(fix_nlm_array) mean.append(m) sig.append(s) for ii in range(self.ntop): fix = top_coefs[ii] fix_nlm_array.load_coefs(nlm, fix) for jj in range(ii): mov = top_coefs[jj] mov_nlm_array.load_coefs(nlm, mov) cc = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=self.nmax, refine=True).best_score cc = (cc - mean[ii] * mean[jj]) / (sig[ii] * sig[jj]) self.cc_array[ii][jj] = cc self.cc_array[jj][ii] = cc outfile = self.prefix + ".cc" out = open(outfile, 'w') print >> out, comment for ii in range(1, self.ntop + 1): print >> out, "%6d" % ii, print >> out, " average" for ii in range(self.ntop): for jj in range(self.ntop): print >> out, "%6.3f" % self.cc_array[ii][jj], print >> out, "%6.3f" % flex.mean(self.cc_array[ii]) clusters = hcluster.hcluster(self.cc_array, 0.8) clusters.print_hclust() out.close() tree_dot_file = self.prefix + ".tree" clusters.print_neato(tree_dot_file) # generate image using neato # run_command('/sw/bin/neato -Tpng -o cluster.png '+tree_dot_file ) #clusters.print_neato() self.clusters = clusters return
def tst_rotation_fft(args): filename = args[0] filename2 = args[1] beta = float(args[2]) nmax = 20 nlm_array = math.nlm_array(nmax) coefs = easy_pickle.load(filename) nlm_array.load_coefs(nlm_array.nlm(), coefs) this_nlm_array = math.nlm_array(nmax) coefs = easy_pickle.load(filename2) this_nlm_array.load_coefs(nlm_array.nlm(), coefs) beta = smath.pi * beta cc_obj = correlation(nlm_array, this_nlm_array, nmax, beta) mm = cc_obj.mm_coef(0) fft_input = flex.complex_double(flex.grid(2 * nmax + 1, 2 * nmax + 1)) fft_r = tst_rotation(args) for ii in range(mm.size()): fft_input[ii] = mm[ii] # print ii, mm[ii], fft_r[ii]/1681.0 fft = fftpack.complex_to_complex_2d(2 * nmax + 1, 2 * nmax + 1) result = fft.backward(fft_input) out = open("fft_" + str(beta) + ".dat", 'w') for ii in range(2 * nmax + 1): for jj in range(2 * nmax + 1): print >> out, ii * 9, jj * 9, abs(result[(ii, jj)]) print >> out out.close()
def __init__(self, data, nmax=20, rmax=None, scan=True, fraction=0.9, smear=True, prefix=None, weight='i', delta_q=None): self.data = data self.smear = smear self.delta_q = delta_q self.setup_weighting_scheme(weight) if (self.smear): self.set_up_smear() self.int_scale = flex.max(self.data.i) self.nmax = nmax if (rmax is None): rmax = int(get_rg(data) * 3.0 / 2.0) self.rmax = rmax self.scan = scan self.prefix = prefix self.fraction = fraction self.nlm_array = math.nlm_array(nmax) self.nlm_total = self.nlm_array.coefs().size() self.nn_array = math.nl_array(nmax) self.nn = self.nn_array.nl() self.nn_total = self.nn_array.coefs().size()
def __init__(self, data, rmax, qmax=0.15, nmax=20, np_on_grid=15, prefix='prefix', fraction=0.9): self.raw_data = data self.rmax = rmax / fraction self.fraction = fraction self.qmax = qmax self.nmax = nmax self.np_on_grid = np_on_grid self.ngp = (self.np_on_grid * 2 + 1)**3 self.all_index = flex.int(range(self.ngp)) self.n = self.np_on_grid * 2 + 1 self.n2 = self.n**2 self.prefix = prefix + '_' self.nlm_array = math.nlm_array(nmax) self.nlm = self.nlm_array.nlm() self.bandwidth = min(smath.pi / rmax / 2.0, 0.01) self.data = data self.scale_2_expt = self.data.i[0] self.initialize_reusable_objects()
def pair_align(self): ms = flex.double() ss = flex.double() tmp_nlm_array = math.nlm_array( self.nmax ) for coef in self.finals: mean = abs( coef[0] ) var = flex.sum( flex.norm( coef ) ) sigma = smath.sqrt( var - mean*mean ) ms.append( mean ) ss.append( sigma) grids = flex.grid(self.n_trial, self.n_trial) self.cc_array=flex.double( grids, 1.0 ) for ii in range( self.n_trial ): self.nlm_array.load_coefs( self.nlm, self.finals[ii] ) for jj in range( ii ): tmp_nlm_array.load_coefs( self.nlm, self.finals[jj] ) cc = fft_align.align( self.nlm_array, tmp_nlm_array, nmax=self.nmax, refine=True ).best_score cc = (cc-ms[ii]*ms[jj])/(ss[ii]*ss[jj]) self.cc_array[(ii,jj)]=cc self.cc_array[(jj,ii)]=cc outfile = self.prefix+"pair.cc" comment = "# electron density correlation coefficient, < rho_1(r)*rho_2(r) >" out=open(outfile, 'w') print>>out, comment for ii in range(1,self.n_trial+1): print>>out,"%6d"%ii, print>>out, " average" for ii in range(self.n_trial): for jj in range(self.n_trial): print>>out,"%6.3f"%self.cc_array[(ii,jj)], print>>out, flex.mean( self.cc_array[ii*self.n_trial:(ii+1)*self.n_trial] ) out.close()
def __init__(self, data, xplor_file, rmax, qmax=0.15, nmax=20, np_on_grid=30, prefix='prefix', splat_range=1, n_trial=5, fraction=0.9, pdb_nlm=None, nbr_dist=2): self.raw_data = data self.rmax = rmax / fraction self.fraction = fraction self.qmax = qmax self.nmax = nmax self.nbr_dist = nbr_dist self.pdb_nlm = pdb_nlm self.np_on_grid = np_on_grid self.zga = math.zernike_grid(self.np_on_grid, self.nmax, False) self.n = self.np_on_grid * 2 + 1 self.n2 = self.n**2 self.n3 = self.n2 * self.n self.prefix = prefix + '_' self.splat_range = splat_range self.start_model = None neighbors = [(-1, 0, 0), (1, 0, 0), (0, -1, 0), (0, 1, 0), (0, 0, -1), (0, 0, 1)] self.neighbors = flex.int() for n in neighbors: self.neighbors.append(self.convert_indx_3_1(n)) self.build_nbr_list(self.nbr_dist) print "neighbor distance: ", self.nbr_dist self.build_sphere_list() #### Labels for different regions #### self.solvent_label = 0 self.molecule_label = 1 self.surface_label = 2 self.nlm_array = math.nlm_array(nmax) self.nlm = self.nlm_array.nlm() self.counter = 0 self.n_accept = 0 self.bandwidth = min(smath.pi / rmax / 2.0, 0.01) # self.data = reduce_raw_data(self.raw_data,self.qmax, self.bandwidth,level=0.00001 ) self.data = self.raw_data self.data.i = self.data.i / self.data.i[0] self.data.s = self.data.i self.scale_2_expt = self.data.i[0] self.build_starting_model() self.mark_mod_core_region0(splat_range) #self.mark_mod_core_region(splat_range) self.n_trial = n_trial self.finals = [] for ii in range(n_trial): self.refine(ii) self.finals.append(self.best_nlm_coefs.deep_copy()) self.pair_align()
def __init__(self, data, rmax, qmax=0.15, nmax=20, np_on_grid=15, prefix='prefix', fraction=0.9): self.raw_data = data self.rmax = rmax / fraction self.fraction = fraction self.qmax = qmax self.nmax = nmax #self.load_maps(xplor_file) #self.ngp = self.raw_map.size() # number of grid point in 3D box self.np_on_grid = np_on_grid self.ngp = (self.np_on_grid * 2 + 1)**3 self.all_index = flex.int(range(self.ngp)) self.n = self.np_on_grid * 2 + 1 self.n2 = self.n**2 self.prefix = prefix + '_' self.nlm_array = math.nlm_array(nmax) self.nlm = self.nlm_array.nlm() self.bandwidth = min(smath.pi / rmax / 2.0, 0.01) #self.data = reduce_raw_data(self.raw_data,self.qmax, self.bandwidth,level=0.00001 ) self.data = data #self.data.i = self.data.i/self.data.i[0] #self.data.s = self.data.i self.scale_2_expt = self.data.i[0] self.initialize_reusable_objects()
def __init__(self, xyz, abs_I0, nmax=30, density=None, external_rmax=-1, np=50, splat_range=1, uniform=False, fix_dx=True, default_dx=0.5, fraction=0.7, protein=0.44, sol_layer=0.03, solvent=0.334, layer=4): self.nmax = nmax self.abs_I0 = abs_I0 self.density = flex.double(xyz.size(), 1.0) self.xyz = xyz self.external_rmax = external_rmax self.default_dx = default_dx self.zernike_mom = math.nlm_array(nmax) self.protein = protein self.solvent = solvent * 1.06 ## empirical parameter self.sol_layer = sol_layer / 2.0 ## empirical parameter self.layer_thick = layer self.calc_mom(np, splat_range, uniform, fix_dx, default_dx, fraction)
def build_with_map(self, mapfile): this_xplor = xplor_map.reader(mapfile) self.np_on_grid = (this_xplor.gridding.n[0]-1 ) /2 ## this is the np_on_grid stored in xplor map self.np_on_grid = int(self.np_on_grid) self.raw_map = this_xplor.data self.map = flex.double( this_xplor.data.size(), 0) self.id = mapfile.split('.')[0] this_rmax = this_xplor.unit_cell.parameters()[0]/2.0 if(self.rmax is not None and ( self.rmax != this_rmax ) ): # do the scaling grid=build_3d_grid(self.np_on_grid, this_rmax) self.nlm_coefs = get_moments_for_scaled_model( self.raw_map, self.np_on_grid, grid, self.nmax, this_rmax, self.rmax) self.nlm_array = math.nlm_array(self.nmax) self.nlm_array.load_coefs(self.nlm_array.nlm(), self.nlm_coefs ) else: self.rmax = this_rmax print self.rmax,"test" threshold = flex.max( self.raw_map )/3.0 select = flex.bool( this_xplor.data.as_1d() >= threshold ) self.map.set_selected( select, 1) self.ngp = self.map.size() # number of grid point in 3D box self.all_indx = flex.int(range( self.ngp )) self.molecule = self.all_indx.select( select ) self.grid_obj=math.sphere_grid(self.np_on_grid, self.nmax) ss = self.grid_obj.construct_space_sum_via_list( self.molecule.as_1d(), self.map.as_1d() ) self.moment_obj = math.zernike_moments( self.grid_obj, self.nmax ) self.moment_obj.calc_moments( ss.as_1d() ) self.nlm_array = self.moment_obj.moments() self.nlm_coefs = self.nlm_array.coefs()
def computeCc(process_n): #compute nlm_cc print "\nRun task cc pid%s" % (os.getpid()), time.ctime() mov_nlm_array = mov_model.nlm_array fix_nlm_array = math.nlm_array(nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() nlmRes = [] start_line = process_n * perSizeCc if (process_n + 1) * perSizeCc > nlNum: end_line = nlNum else: end_line = (process_n + 1) * perSizeCc print "cc_end_line:", end_line for i in range(start_line, end_line): indx = sortedNlRes[i][0] fix = nlm_coefs[indx][0:nlm_total] fix_nlm_array.load_coefs(nlm, fix) align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True) cc = align_obj.get_cc() print indx, codes[indx], cc nlmRes.append((indx, codes[indx], cc)) # print "thread %d finished at"%(process_n), time.ctime() print "len------", len(nlmRes) return nlmRes
def __init__(self, data, rg, io, nmax=20, lmax=10, rmax=None, scan=True, fraction=0.9, smear=True, prefix=None, weight='i', delta_q=None, scale_power=2.0): self.data = data blq0 = self.data.blq[0] self.data.blq = self.data.blq / blq0 self.setup_weighting_scheme(weight) self.smear = smear self.rg = rg self.io = io self.int_scale = self.io self.nmax = nmax self.lmax = lmax if (rmax is None): rmax = int(self.rg * 3.0 / 2.0) self.rmax = rmax self.scan = scan self.prefix = prefix self.fraction = fraction self.nlm_array = math.nlm_array(nmax) self.nlm = self.nlm_array.nlm() self.nlm_total = self.nlm_array.coefs().size() self.scale_power = scale_power
def get_profile(nn_array, rmax, fraction=0.9, nmax=10, qmax=0.25, qstep=0.002): nlm_array = math.nlm_array(nmax) n = int(qmax / qstep + 0.5) q_array = flex.double(range(n + 1)) / float(n) * qmax z_model = zernike_model(nlm_array, q_array, rmax / fraction, nmax) result = z_model.calc_intensity(nn_array) return q_array, result
def tst_rotation(args): filename = args[0] filename2 = args[1] beta = float(args[2]) ngrid = 40 nmax = 20 nlm_array = math.nlm_array(nmax) coefs = easy_pickle.load(filename) nlm_array.load_coefs(nlm_array.nlm(), coefs) this_nlm_array = math.nlm_array(nmax) coefs = easy_pickle.load(filename2) this_nlm_array.load_coefs(nlm_array.nlm(), coefs) beta = smath.pi * beta cc_obj = correlation(nlm_array, this_nlm_array, nmax, beta) fft_input = flex.complex_double(flex.grid(2 * nmax + 1, 2 * nmax + 1)) count = 0 radian = 180.0 / smath.pi out = open("scan_" + str(beta) + ".dat", 'w') for ii in range(ngrid + 1): for jj in range(ngrid + 1): alpha = ii * smath.pi * 2.0 / ngrid gama = jj * smath.pi * 2.0 / ngrid cc = cc_obj.calc_correlation(alpha, beta, gama) fft_input[count] = cc count = count + 1 print >> out, alpha * radian, gama * radian, abs(cc) print >> out out.close() fft = fftpack.complex_to_complex_2d(2 * nmax + 1, 2 * nmax + 1) result = fft.forward(fft_input) #return result result = fft.backward(result) out = open("fft_fake_" + str(beta) + ".dat", 'w') for ii in range(2 * nmax + 1): for jj in range(2 * nmax + 1): print >> out, ii * 9, jj * 9, abs(result[(jj, ii)]) print >> out out.close()
def tst(): nmax = 20 max_indx = math.nlm_array(nmax).nlm().size() a = flex.complex_double(flex.random_double(max_indx), flex.random_double(max_indx)) b = flex.complex_double(flex.random_double(max_indx), flex.random_double(max_indx)) c_list = [a] good_n = good2n(nmax, c_list, b, threshold=0.8) print good_n
def build_with_nlm(self, nlm_coefs): self.nlm_array=math.nlm_array( self.nmax ) self.nlm_total = self.nlm_array.coefs().size() if(nlm_coefs.size() < self.nlm_total): print "The nmax is bigger than the nmax used to build the database" else: self.nlm_coefs = nlm_coefs[0:self.nlm_total] self.nlm_array.load_coefs( self.nlm_array.nlm(), self.nlm_coefs ) if( self.rmax is None): self.rmax=50 print "WARNING: rmax was not specified, and default value ( 50.0 ) was used"
def calcc(modelId): nlm_array_mov = math.nlm_array(nmax) nlm = nlm_array_ref.nlm() nlm_array_mov.load_coefs(nlm, coefs[modelId][0:nlm_total]) align_obj = fft_align.align(nlm_array_ref, nlm_array_mov, nmax=nmax, refine=True) cc = align_obj.get_cc() print "c.c. between ", os.path.split( pdbfile)[-1], "and ", codes[modelId], "is ", cc return cc
def tst_nlm(): nlm_array = math.nlm_array(10) nlm_array.set_coef(10,2,2, 3.0) a = nlm_array.get_coef(10,2,2) assert ( abs(a-3.0) ) <= 1e-5 nlm_ind = shared.tiny_int_3( [(10,2,2),(8,2,2)] ) nlm_coef = flex.complex_double( [15.0+0j,15.0+0j] ) assert nlm_array.load_coefs( nlm_ind , nlm_coef) a = nlm_array.get_coef(10, 2, 2) b = nlm_array.get_coef(8, 2, 2) assert ( abs(a-15.0) ) <= 1e-5 assert ( abs(b-15.0) ) <= 1e-5 nlm = nlm_array.nlm() cnlm = nlm_array.coefs() sel = nlm_array.select_on_nl(2,2) assert len(sel)==5 assert 5 in sel assert 6 in sel assert 7 in sel assert 8 in sel assert 9 in sel
def __init__(self, data, rg, io, nmax=20, rmax=None, scan=True, fraction=0.9, smear=True, prefix=None, weight='i', delta_q=None, scale_power=2.0): global stdfile global outfilelog self.stdfile = stdfile self.outfilelog = outfilelog self.data = data self.smear = smear self.delta_q = delta_q self.setup_weighting_scheme(weight) if (self.smear): self.set_up_smear() self.rg = rg self.io = io self.int_scale = self.io self.nmax = nmax if (rmax is None): rmax = int(self.rg * 3.0 / 2.0) self.rmax = rmax self.scan = scan self.prefix = prefix self.fraction = fraction self.nlm_array = math.nlm_array(nmax) self.nlm_total = self.nlm_array.coefs().size() self.nn_array = math.nl_array(nmax) self.nn = self.nn_array.nl() self.nn_total = self.nn_array.coefs().size() self.scale_power = scale_power
def computeCc(thread_n, arr_cc): #compute nlm_cc print"----------thread_cc----------",thread_n mov_nlm_array = mov_model.nlm_array fix_nlm_array = math.nlm_array(nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() nlmRes = [] if ((thread_n+1)*perSizeCc < nlNum): # range(x,y) => [x,y-1] end_line = (thread_n+1)*perSizeCc else: end_line = nlNum for i in range(start_line, end_line): indx = sortedNlRes[i][0] fix = nlm_coefs[indx][0:nlm_total] fix_nlm_array.load_coefs(nlm, fix) align_obj = fft_align.align( fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True ) cc = align_obj.get_cc() nlmRes.append((indx, codes[indx], cc)) arr_cc.extend(nlmRes)
def build_map(nmax, shapes, coefs, codes, pdb_models): np_on_grid = 30 zga = math.zernike_grid(np_on_grid, nmax, False) ref_nlm_array = math.nlm_array(nmax) mov_nlm_array = math.nlm_array(nmax) nlm = ref_nlm_array.nlm() nlm_total = ref_nlm_array.coefs().size() top_cc = flex.double() ntop = shapes.ntop rank = 0 ave_c = flex.complex_double(nlm_total, 0) if (pdb_models is not None): ref_nlm_array.load_coefs(nlm, pdb_models[0].nlm_coef) fraction = 0 else: c = coefs[shapes.best_indices[0]][0:nlm_total] ave_c = c.deep_copy() ref_nlm_array.load_coefs(nlm, c) rank = 1 filename = "m" + str(rank) + "_" + shapes.best_codes[0] + ".xplor" fraction = write_xplor(zga, nlm, c, np_on_grid, shapes.best_rmax[0], filename) print rank, shapes.best_codes[0], fraction ref_mean, ref_s = get_mean_sigma(ref_nlm_array) mean_frac = fraction mean_sqr_frac = fraction * fraction for ii, code in zip(shapes.best_indices[rank:], shapes.best_codes[rank:]): coef = coefs[ii][0:nlm_total] mov_nlm_array.load_coefs(nlm, coef) mov_mean, mov_s = get_mean_sigma(mov_nlm_array) align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) new_c = align_obj.moving_nlm.coefs() cc = align_obj.get_cc() top_cc.append(cc) ave_c = ave_c + new_c filename = "m" + str(rank + 1) + "_" + code + ".xplor" fraction = write_xplor(zga, nlm, new_c, np_on_grid, shapes.best_rmax[rank], filename) rank = rank + 1 print rank, code, fraction mean_frac = mean_frac + fraction mean_sqr_frac = mean_sqr_frac + fraction * fraction #sphere_volume = 4.0/3.0*smath.pi*rmax*rmax*rmax rmax = shapes.best_rmax[0] sphere_volume = rmax * rmax * rmax * 8.0 mean_frac = mean_frac / ntop sigma_frac = smath.sqrt(mean_sqr_frac / ntop - mean_frac * mean_frac) print "Volume is ", mean_frac * sphere_volume, "+/-", sigma_frac * sphere_volume #### Write average map #### filename = "ave_map.xplor" write_xplor(zga, nlm, ave_c / ntop, np_on_grid, rmax, filename) return top_cc
def build_map(nmax, rmax, coefs, codes, model_indx, pdb_models, prefix, clusters=None, fract=0.9, type='.ccp4'): global stdfile global outfilelog rmax_over_fraction = rmax / fract np_on_grid = 30 zga = math.zernike_grid(np_on_grid, nmax, False) ref_nlm_array = math.nlm_array(nmax) mov_nlm_array = math.nlm_array(nmax) nlm = ref_nlm_array.nlm() nlm_total = ref_nlm_array.coefs().size() top_cc = flex.double() top_ids = [] ntop = model_indx.size() rank = 0 ave_c = flex.complex_double(nlm_total, 0) aligned_coefs = [] map_files = [] levels = [] scale_model = False if ((pdb_models is not None) and (pdb_models[0].rmax > rmax)): scale_model = True external_rmax = pdb_models[0].rmax grid = build_3d_grid(np_on_grid, rmax_over_fraction) with open(stdfile, "a") as log: print >> log, "Rank PDB_code cc (to the given model or the first model):" print "Rank PDB_code cc (to the given model or the first model):" if (pdb_models is not None): ref_nlm_array.load_coefs(nlm, pdb_models[0].nlm_coef) fraction = 0 else: c = coefs[model_indx[0]][0:nlm_total] ave_c = c.deep_copy() ref_nlm_array.load_coefs(nlm, c) rank = 1 if prefix != None: filename = prefix + "m" + str(rank) + "_" + codes[model_indx[0]] #filename = os.path.join(prefix,"m"+str(rank)+"_"+codes[ model_indx[0] ]) else: filename = "m" + str(rank) + "_" + codes[model_indx[0]] map_files.append(filename + type) fraction, map = write_map(zga, nlm, c, np_on_grid, rmax_over_fraction, filename, type=type) #level = flex.max(map)/3.0 print "map in search pdb.py: \n" print map level = map.standard_deviation_of_the_sample() print "level in search pdb: ", level levels.append(level) top_cc.append(1.0) # refering to itself if (scale_model): c = get_moments_for_scaled_model(map, np_on_grid, grid, nmax, rmax, external_rmax) with open(stdfile, "a") as log: print >> log, rank, codes[model_indx[0]] print rank, codes[model_indx[0]] aligned_coefs.append(c.deep_copy()) # save the aligned nlm coefs mean_frac = fraction mean_sqr_frac = fraction * fraction for ii in model_indx[rank:]: rank = rank + 1 c = coefs[ii][0:nlm_total] mov_nlm_array.load_coefs(nlm, c) align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) new_c = align_obj.moving_nlm.coefs() if prefix != None: #filename = os.path.join(prefix,"m"+str(rank)+"_"+codes[ii]) filename = prefix + "m" + str(rank) + "_" + codes[ii] print "**********************************" print "outfilelog: ", outfilelog with open(outfilelog, "a") as f: print >> f, filename + ".ccp4" else: filename = "m" + str(rank) + "_" + codes[ii] with open(outfilelog, "a") as f: print >> f, filename + ".ccp4" map_files.append(filename + type) fraction, map = write_map(zga, nlm, new_c, np_on_grid, rmax_over_fraction, filename, type=type) if (scale_model): c = get_moments_for_scaled_model(map, np_on_grid, grid, nmax, rmax, external_rmax) mov_nlm_array.load_coefs(nlm, c) align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) new_c = align_obj.moving_nlm.coefs() fraction, map = write_map(zga, nlm, new_c, np_on_grid, rmax_over_fraction, filename, type=type) level = flex.max(map) / 3.0 levels.append(level) cc = align_obj.get_cc() with open(stdfile, "a") as log: print >> log, "%2d %5s %5.3f" % (rank, codes[ii], cc) print "%2d %5s %5.3f" % (rank, codes[ii], cc) top_cc.append(cc) top_ids.append(codes[ii]) ave_c = ave_c + new_c aligned_coefs.append(new_c.deep_copy()) # save the aligned nlm coefs mean_frac = mean_frac + fraction mean_sqr_frac = mean_sqr_frac + fraction * fraction sphere_volume = rmax_over_fraction**3.0 * 8.0 # cube with d=2.0*r mean_frac = mean_frac / ntop sigma_frac = smath.sqrt(mean_sqr_frac / ntop - mean_frac * mean_frac) with open(stdfile, "a") as log: print >> log, "Volume is ", mean_frac * sphere_volume, "+/-", sigma_frac * sphere_volume, "(A^3)" print "Volume is ", mean_frac * sphere_volume, "+/-", sigma_frac * sphere_volume, "(A^3)" #### Write average map #### ave_maps = [] ave_levels = [] ave_cc = [] cluster_ids = [1] * len(model_indx) # initialize cluster_ids # filename = "ave_map" # map_files.append( filename+type ) # fraction, map=write_map( zga, nlm, ave_c/ntop, np_on_grid, rmax_over_fraction, filename, type=type ) # levels.append( flex.max(map)/3.0 ) # if( len(clusters.nodes) == 1): return top_cc, top_ids, map_files, levels, [1]*len(map_files) cluster_id = 1 with open(stdfile, "a") as log: print >> log, "cc. between Cluster average and PDB model" print "cc. between Cluster average and PDB model" for node in clusters.nodes: ave_c = ave_c * 0 coefs_list = [] for ii in node.leaf_eles: ave_c = ave_c + aligned_coefs[ii] cluster_ids[ii] = cluster_id coefs_list.append(aligned_coefs[ii]) ave_c = ave_c / len(node.leaf_eles) level_n = model_consistency.good2n(nmax, coefs_list, ave_c, threshold=0.90, outfile=stdfile) with open(stdfile, "a") as log: print >> log, "consistency level to order n: %d" % level_n print "consistency level to order n: %d" % level_n mov_nlm_array.load_coefs(nlm, ave_c) align_obj = fft_align.align(ref_nlm_array, mov_nlm_array, nmax=nmax, refine=True) cc = align_obj.get_cc() ave_cc.append(cc) with open(stdfile, "a") as log: print >> log, "cluster # ", cluster_id, "cc=", cc print "cluster # ", cluster_id, "cc=", cc if prefix == None: filename = "ave_" + str(cluster_id) with open(outfilelog, "a") as f: print >> f, filename else: filename = prefix + "ave_" + str(cluster_id) with open(outfilelog, "a") as f: print >> f, filename + ".ccp4" ave_maps.append(filename + type) fraction, map = write_map(zga, nlm, ave_c, np_on_grid, rmax_over_fraction, filename, type=type) ave_levels.append(flex.max(map) / 3.0) with open(stdfile, "a") as log: print >> log, "Averaged Model #%d Volume is %f (A^3)" % ( cluster_id, fraction * sphere_volume) print "Averaged Model #%d Volume is %f (A^3)" % (cluster_id, fraction * sphere_volume) cluster_id = cluster_id + 1 # with open(stdfile,"a") as log: # log.write("__END__") return top_cc, top_ids, map_files, levels, cluster_ids, ave_maps, ave_levels, ave_cc
def run(args): targetfile = os.path.join(os.path.split(sys.path[0])[0], "retrieval.txt") with open(targetfile, "w") as f: f.truncate() time1 = time.time() global nmax global nlm_array_ref global coefs global nlm_total global codes global pdbfile params = get_input(args, master_params, "retrieval", banner, help) if (params is None): exit() pdbfile = params.retrieval.pdbfile dbpath = params.retrieval.dbpath nmax = params.retrieval.nmax dbprefix = params.retrieval.db_prefix prefix = params.retrieval.prefix print "=============process the protein model==============" with open(targetfile, "a") as f: print >> f, "=============process the protein model==============" zernike_moments(pdbfile, nmax=nmax) queryCoefFile = pdbfile.split(".")[0] + ".nlm.pickle" #queryCoefFile=pdbfile.replace("pdb", "nlm.pickle") queryCoef = easy_pickle.load(queryCoefFile) with open(targetfile, "a") as f: print >> f, "=============load database===============" print "=============load database===============" if (dbpath is None): dbpath = set_default_db_path() codes = easy_pickle.load(os.path.join(dbpath, dbprefix + ".codes")) coefs = easy_pickle.load(os.path.join(dbpath, dbprefix + ".nlm")) else: codes = easy_pickle.load(os.path.join(dbpath, dbprefix + ".codes")) coefs = easy_pickle.load(os.path.join(dbpath, dbprefix + ".nlm")) with open(targetfile, "a") as f: print >> f, "=============database=============" print >> f, os.path.join(dbpath, dbprefix + ".codes") print >> f, os.path.join(dbpath, dbprefix + ".nlm") print >> f, "==================================" print "=============database=============" print os.path.join(dbpath, dbprefix + ".codes") print os.path.join(dbpath, dbprefix + ".nlm") print "==================================" nmodels = len(coefs) nlm_array_ref = math.nlm_array(nmax) nlm = nlm_array_ref.nlm() nlm_total = nlm_array_ref.coefs().size() nlm_array_ref.load_coefs(nlm, queryCoef[0:nlm_total]) p = Pool(8) cclist = p.map(calcc, range(nmodels)) distlist = [1 - cc for cc in cclist] rankedlist = sorted(range(nmodels), key=lambda k: distlist[k]) rankedcodes = [codes[rank] for rank in rankedlist] sortedcclist = sorted(cclist, reverse=True) with open(targetfile, "a") as f: print >> f, "=========Tope 10 models matching the input protein model============" print "=========Tope 10 models matching the input protein model============" with open(targetfile, "a") as f: for i in range(10): print "top ", (i + 1), " ", rankedcodes[i], "c.c.", sortedcclist[i] print >> f, "top ", ( i + 1), " ", rankedcodes[i], "c.c.", sortedcclist[i] time2 = time.time() print "time used:", time2 - time1 with open(targetfile, "a") as f: print >> f, "time used: ", time2 - time1
def pair_align(self, nlm_coefs, calc_cc=True): self.cc_array = [] for ii in range(self.ntop): self.cc_array.append(flex.double(self.ntop, 1)) if (nlm_coefs is not None and calc_cc): comment = "# Correlation Coefficient <rho_1(r)*rho_2(r)>" fix_nlm_array = math.nlm_array(self.nmax) mov_nlm_array = math.nlm_array(self.nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() top_coefs = [] mean = flex.double() sig = flex.double() for ii in self.best_indices: fix = nlm_coefs[ii][0:nlm_total] top_coefs.append(fix) fix_nlm_array.load_coefs(nlm, fix) m, s = get_mean_sigma(fix_nlm_array) mean.append(m) sig.append(s) for ii in range(self.ntop): fix = top_coefs[ii] fix_nlm_array.load_coefs(nlm, fix) for jj in range(ii): mov = top_coefs[jj] mov_nlm_array.load_coefs(nlm, mov) cc = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=self.nmax, refine=True).best_score cc = (cc - mean[ii] * mean[jj]) / (sig[ii] * sig[jj]) self.cc_array[ii][jj] = cc self.cc_array[jj][ii] = cc else: # There is no nlm coefs loaded comment = "# Coefficient distance, similar to the eq. (12) in L. Mak et al, JMGM.26 (2008) P.1035" all_nn_coefs = [] for ii in range(self.ntop): nn_i = self.coefs[self.best_models[ii]].deep_copy() nn_i = nn_i / nn_i[0] all_nn_coefs.append(nn_i) for ii in range(self.ntop): for jj in range(ii + 1): cc = (all_nn_coefs[ii] - all_nn_coefs[jj]).norm() self.cc_array[ii][jj] = cc self.cc_array[jj][ii] = cc outfile = self.prefix + ".cc" out = open(outfile, 'w') print >> out, comment for ii in range(1, self.ntop + 1): print >> out, "%6d" % ii, print >> out, " average" for ii in range(self.ntop): for jj in range(self.ntop): print >> out, "%6.3f" % self.cc_array[ii][jj], print >> out, "%6.3f" % flex.mean(self.cc_array[ii]) clusters = hcluster.hcluster(self.cc_array, 0.8) clusters.print_hclust() out.close() tree_dot_file = self.prefix + ".tree" clusters.print_dot(tree_dot_file) clusters.print_neato()
def find_relatives(ids, cc_min, cc_max, rmax, codes, moments, nmax=10): indices = flex.int() idlist = open('id_list.txt', 'r') for id in idlist: id = id[0:4] indices.append(flex.first_index(codes, id)) r_max = easy_pickle.load(prefix + 'pisa.rmax') nns = easy_pickle.load(prefix + 'pisa.nn') nn_array = math.nl_array(nmax) nn_indx = nn_array.nl() nn_total = nn_indx.size() q_array = flex.double(range(501)) / 2000.0 ref_nlm_array = math.nlm_array(nmax) target_nlm_array = math.nlm_array(nmax) nlm = ref_nlm_array.nlm() coef_size = nlm.size() all_indices = range(codes.size()) small_q_array = flex.double(range(51)) / 300.0 mean = [] sig = [] for indx in indices: print indx #rmax = 50.0 #r_max[indx] ref_coef = moments[indx] ref_nlm_array.load_coefs(nlm, ref_coef[0:coef_size]) z_model = zernike_model(ref_nlm_array, q_array, rmax, nmax) out_name = codes[indx] + "_.qi" nn_array.load_coefs(nn_indx, nns[indx][0:nn_total]) ref_int = put_intensity(z_model, q_array, nn_array, out_name) mean_r = ref_int * 0.0 sig_r = ref_int * 0.0 small_z_model = zernike_model(ref_nlm_array, small_q_array, rmax, nmax) small_ref_int = small_z_model.calc_intensity(nn_array) small_ref_int = small_ref_int / small_ref_int[0] N = 0.0 for coef, ii in zip(moments, all_indices): if N > 25: break target_nlm_array.load_coefs(nlm, coef[0:coef_size]) align_obj = fft_align.align(ref_nlm_array, target_nlm_array, nmax=nmax, topn=10, refine=False) cc = align_obj.get_cc() if (cc >= cc_min and cc <= cc_max): N += 1 nn_array.load_coefs(nn_indx, nns[ii][0:nn_total]) opt_r_obj = optimize_r(nn_array, small_ref_int, small_q_array, nmax) opt_r = gss(opt_r_obj.target, rmax * 0.8, rmax * 1.2) z_model = zernike_model(ref_nlm_array, q_array, opt_r, nmax) out_name = codes[indx] + "_" + codes[ii] + ".qi.rel" mod_int = put_intensity(z_model, q_array, nn_array, out_name, ref_int) out_name = codes[indx] + "_" + codes[ii] + ".qi" put_intensity(z_model, q_array, nn_array, out_name) mod_int = mod_int - 1.0 mean_r += mod_int sig_r += mod_int * mod_int print ii, cc, codes[ii], opt_r if N > 3: mean_r /= N sig_r = sig_r / N - mean_r * mean_r mean.append(mean_r) sig.append(sig_r) N = len(mean) if N > 0: mean_r = mean[0] * 0.0 s_r = mean[0] * 0.0 for uu in range(N): mean_r += mean[uu] s_r += sig[uu] mean_r /= N s_r /= N s_r = flex.sqrt(s_r) f = open('q_m_s_%s.dat' % rmax, 'w') for q, m, s in zip(q_array, mean_r, s_r): print >> f, q, m, s
import os, sys def set_default_db_path(): import libtbx import libtbx.env_config env = libtbx.env_config.unpickle() sastbx_path = env.dist_path("sastbx") path = sastbx_path + '/database/' print "\nATTENTION: Database path was set to : >>%s<<" % path return path prefix = set_default_db_path() trivial_nlm_array = math.nlm_array(10) class optimize_r(object): def __init__(self, nn_array, ref_int, q_array, nmax=10): self.nn_array = nn_array self.ref_int = ref_int self.q_array = q_array self.nmax = nmax def target(self, r): z_model = zernike_model(trivial_nlm_array, self.q_array, r, self.nmax) calc_i = z_model.calc_intensity(self.nn_array) calc_i = calc_i / calc_i[0] return flex.sum_sq(self.ref_int - calc_i)
def run(args): # filename = "res" + str(filenum) + ".txt" targetfile = os.path.join(os.path.split(sys.path[0])[0], "c5") with open(targetfile, "w") as f: f.truncate() tempf = open(targetfile, 'w') print args params = get_input(args, master_params, "aligndb", banner, help, tempf) tempf.close() if params is None: return fix = params.align.fix typef = params.align.typef mov = params.align.mov typem = params.align.typem num_grid = params.align.num_grid nmax = params.align.nmax rmax = params.align.rmax topn = params.align.topn write_map = params.align.write_map nlNum = params.align.nlnum nlmNum = params.align.nlmnum #fix_model=model_interface.build_model( fix, typef, nmax, rmax ) mov_model = model_interface.build_model(mov, typem, nmax, rmax) # prefix="/home/dongxq/align_code/dude-actives" prefix = "/home/dongxq/zalign/build/myDB" codes = easy_pickle.load(prefix + ".codes") nlm_coefs = easy_pickle.load(prefix + ".nlm") nl_coefs = easy_pickle.load(prefix + ".nl") rmaxs = easy_pickle.load(prefix + ".rmax") #compute distance nlRes = [] mov_nl_array = mov_model.nl_array mov_nl_coefs = mov_model.nl_array.coefs() tnl1 = time.time() for indx in range(len(nl_coefs)): #compute Chi-sequare distance mf_coef = numpy.true_divide(nl_coefs[indx], mov_nl_coefs) dist = numpy.sum(numpy.square(mov_nl_coefs - mf_coef * nl_coefs[indx])) #compute Mahalanobis distance # dist = mol2.Mahalanobis(mov_nl_coefs,nl_coefs[indx]) nlRes.append((indx, dist, codes[indx])) sortedNlRes = sorted(nlRes, key=operator.itemgetter(1), reverse=False) tnl2 = time.time() # compute nl_cc # nl_cc_res = [] # mov_nl_array = mov_model.nl_array # mov_nl_coefs = mov_model.nl_array.coefs() # tnl1 = time.time() # for indx in range(len(nl_coefs)): # nl_cc = pearson.pearson_cc(mov_nl_coefs, nl_coefs[indx]) # print nl_cc # nl_cc_res.append((indx, nl_cc, codes[indx])) # sortedNlRes = sorted(nl_cc_res, key=operator.itemgetter(1), reverse=True) # tnl2 = time.time() #compute nlm_cc mov_nlm_array = mov_model.nlm_array fix_nlm_array = math.nlm_array(nmax) nlm = fix_nlm_array.nlm() nlm_total = fix_nlm_array.nlm().size() nlmRes = [] tnlm1 = time.time() for i in range(nlNum): indx = sortedNlRes[i][0] fix = nlm_coefs[indx][0:nlm_total] fix_nlm_array.load_coefs(nlm, fix) align_obj = fft_align.align(fix_nlm_array, mov_nlm_array, nmax=nmax, refine=True) cc = align_obj.get_cc() nlmRes.append((indx, codes[indx], cc)) sortedNlmRes = sorted(nlmRes, key=operator.itemgetter(2), reverse=True) sortedNlmRes = sortedNlmRes[:nlmNum] tnlm2 = time.time() #merge chi to cc arr tmerge1 = time.time() for i in range(nlmNum): indx = sortedNlmRes[i][0] chi = list(filter(lambda j: j[0] == indx, sortedNlRes[0:]))[0][1] sortedNlmRes[i] += (chi, ) tmerge2 = time.time() print "merge time used: ", tmerge2 - tmerge1 #output with open(targetfile, "w") as f: f.write("############# SUMMARY of ALIGNMENT #############\n") f.write( "rank indx name cc chi-square\n") rank = 0 for arr in sortedNlmRes: rank += 1 arr = (rank, ) + arr f.write(str(arr) + "\n") t3 = time.time() f.write("rotation invariant computing time used: " + str(tnl2 - tnl1) + "\n") f.write("alignment computing time used: " + str(tnlm2 - tnlm1) + "\n") f.write("total time used: : " + str(t3 - t1))