def ali3d_MPI(stack, ref_vol, outdir, maskfile = None, ir = 1, ou = -1, rs = 1, xr = "4 2 2 1", yr = "-1", ts = "1 1 0.5 0.25", delta = "10 6 4 4", an = "-1", center = 0, maxit = 5, term = 95, CTF = False, fourvar = False, snr = 1.0, ref_a = "S", sym = "c1", sort=True, cutoff=999.99, pix_cutoff="0", two_tail=False, model_jump="1 1 1 1 1", restart=False, save_half=False, protos=None, oplane=None, lmask=-1, ilmask=-1, findseam=False, vertstep=None, hpars="-1", hsearch="73.0 170.0", full_output = False, compare_repro = False, compare_ref_free = "-1", ref_free_cutoff= "-1 -1 -1 -1", wcmask = None, debug = False, recon_pad = 4): from alignment import Numrinit, prepare_refrings from utilities import model_circle, get_image, drop_image, get_input_from_string from utilities import bcast_list_to_all, bcast_number_to_all, reduce_EMData_to_root, bcast_EMData_to_all from utilities import send_attr_dict from utilities import get_params_proj, file_type from fundamentals import rot_avg_image import os import types from utilities import print_begin_msg, print_end_msg, print_msg from mpi import mpi_bcast, mpi_comm_size, mpi_comm_rank, MPI_FLOAT, MPI_COMM_WORLD, mpi_barrier, mpi_reduce from mpi import mpi_reduce, MPI_INT, MPI_SUM, mpi_finalize from filter import filt_ctf from projection import prep_vol, prgs from statistics import hist_list, varf3d_MPI, fsc_mask from numpy import array, bincount, array2string, ones number_of_proc = mpi_comm_size(MPI_COMM_WORLD) myid = mpi_comm_rank(MPI_COMM_WORLD) main_node = 0 if myid == main_node: if os.path.exists(outdir): ERROR('Output directory exists, please change the name and restart the program', "ali3d_MPI", 1) os.mkdir(outdir) mpi_barrier(MPI_COMM_WORLD) if debug: from time import sleep while not os.path.exists(outdir): print "Node ",myid," waiting..." sleep(5) info_file = os.path.join(outdir, "progress%04d"%myid) finfo = open(info_file, 'w') else: finfo = None mjump = get_input_from_string(model_jump) xrng = get_input_from_string(xr) if yr == "-1": yrng = xrng else : yrng = get_input_from_string(yr) step = get_input_from_string(ts) delta = get_input_from_string(delta) ref_free_cutoff = get_input_from_string(ref_free_cutoff) pix_cutoff = get_input_from_string(pix_cutoff) lstp = min(len(xrng), len(yrng), len(step), len(delta)) if an == "-1": an = [-1] * lstp else: an = get_input_from_string(an) # make sure pix_cutoff is set for all iterations if len(pix_cutoff)<lstp: for i in xrange(len(pix_cutoff),lstp): pix_cutoff.append(pix_cutoff[-1]) # don't waste time on sub-pixel alignment for low-resolution ang incr for i in range(len(step)): if (delta[i] > 4 or delta[i] == -1) and step[i] < 1: step[i] = 1 first_ring = int(ir) rstep = int(rs) last_ring = int(ou) max_iter = int(maxit) center = int(center) nrefs = EMUtil.get_image_count( ref_vol ) nmasks = 0 if maskfile: # read number of masks within each maskfile (mc) nmasks = EMUtil.get_image_count( maskfile ) # open masks within maskfile (mc) maskF = EMData.read_images(maskfile, xrange(nmasks)) vol = EMData.read_images(ref_vol, xrange(nrefs)) nx = vol[0].get_xsize() ## make sure box sizes are the same if myid == main_node: im=EMData.read_images(stack,[0]) bx = im[0].get_xsize() if bx!=nx: print_msg("Error: Stack box size (%i) differs from initial model (%i)\n"%(bx,nx)) sys.exit() del im,bx # for helical processing: helicalrecon = False if protos is not None or hpars != "-1" or findseam is True: helicalrecon = True # if no out-of-plane param set, use 5 degrees if oplane is None: oplane=5.0 if protos is not None: proto = get_input_from_string(protos) if len(proto) != nrefs: print_msg("Error: insufficient protofilament numbers supplied") sys.exit() if hpars != "-1": hpars = get_input_from_string(hpars) if len(hpars) != 2*nrefs: print_msg("Error: insufficient helical parameters supplied") sys.exit() ## create helical parameter file for helical reconstruction if helicalrecon is True and myid == main_node: from hfunctions import createHpar # create initial helical parameter files dp=[0]*nrefs dphi=[0]*nrefs vdp=[0]*nrefs vdphi=[0]*nrefs for iref in xrange(nrefs): hpar = os.path.join(outdir,"hpar%02d.spi"%(iref)) params = False if hpars != "-1": # if helical parameters explicitly given, set twist & rise params = [float(hpars[iref*2]),float(hpars[(iref*2)+1])] dp[iref],dphi[iref],vdp[iref],vdphi[iref] = createHpar(hpar,proto[iref],params,vertstep) # get values for helical search parameters hsearch = get_input_from_string(hsearch) if len(hsearch) != 2: print_msg("Error: specify outer and inner radii for helical search") sys.exit() if last_ring < 0 or last_ring > int(nx/2)-2 : last_ring = int(nx/2) - 2 if myid == main_node: # import user_functions # user_func = user_functions.factory[user_func_name] print_begin_msg("ali3d_MPI") print_msg("Input stack : %s\n"%(stack)) print_msg("Reference volume : %s\n"%(ref_vol)) print_msg("Output directory : %s\n"%(outdir)) if nmasks > 0: print_msg("Maskfile (number of masks) : %s (%i)\n"%(maskfile,nmasks)) print_msg("Inner radius : %i\n"%(first_ring)) print_msg("Outer radius : %i\n"%(last_ring)) print_msg("Ring step : %i\n"%(rstep)) print_msg("X search range : %s\n"%(xrng)) print_msg("Y search range : %s\n"%(yrng)) print_msg("Translational step : %s\n"%(step)) print_msg("Angular step : %s\n"%(delta)) print_msg("Angular search range : %s\n"%(an)) print_msg("Maximum iteration : %i\n"%(max_iter)) print_msg("Center type : %i\n"%(center)) print_msg("CTF correction : %s\n"%(CTF)) print_msg("Signal-to-Noise Ratio : %f\n"%(snr)) print_msg("Reference projection method : %s\n"%(ref_a)) print_msg("Symmetry group : %s\n"%(sym)) print_msg("Fourier padding for 3D : %i\n"%(recon_pad)) print_msg("Number of reference models : %i\n"%(nrefs)) print_msg("Sort images between models : %s\n"%(sort)) print_msg("Allow images to jump : %s\n"%(mjump)) print_msg("CC cutoff standard dev : %f\n"%(cutoff)) print_msg("Two tail cutoff : %s\n"%(two_tail)) print_msg("Termination pix error : %f\n"%(term)) print_msg("Pixel error cutoff : %s\n"%(pix_cutoff)) print_msg("Restart : %s\n"%(restart)) print_msg("Full output : %s\n"%(full_output)) print_msg("Compare reprojections : %s\n"%(compare_repro)) print_msg("Compare ref free class avgs : %s\n"%(compare_ref_free)) print_msg("Use cutoff from ref free : %s\n"%(ref_free_cutoff)) if protos: print_msg("Protofilament numbers : %s\n"%(proto)) print_msg("Using helical search range : %s\n"%hsearch) if findseam is True: print_msg("Using seam-based reconstruction\n") if hpars != "-1": print_msg("Using hpars : %s\n"%hpars) if vertstep != None: print_msg("Using vertical step : %.2f\n"%vertstep) if save_half is True: print_msg("Saving even/odd halves\n") for i in xrange(100) : print_msg("*") print_msg("\n\n") if maskfile: if type(maskfile) is types.StringType: mask3D = get_image(maskfile) else: mask3D = maskfile else: mask3D = model_circle(last_ring, nx, nx, nx) numr = Numrinit(first_ring, last_ring, rstep, "F") mask2D = model_circle(last_ring,nx,nx) - model_circle(first_ring,nx,nx) fscmask = model_circle(last_ring,nx,nx,nx) if CTF: from filter import filt_ctf from reconstruction_rjh import rec3D_MPI_noCTF if myid == main_node: active = EMUtil.get_all_attributes(stack, 'active') list_of_particles = [] for im in xrange(len(active)): if active[im]: list_of_particles.append(im) del active nima = len(list_of_particles) else: nima = 0 total_nima = bcast_number_to_all(nima, source_node = main_node) if myid != main_node: list_of_particles = [-1]*total_nima list_of_particles = bcast_list_to_all(list_of_particles, source_node = main_node) image_start, image_end = MPI_start_end(total_nima, number_of_proc, myid) # create a list of images for each node list_of_particles = list_of_particles[image_start: image_end] nima = len(list_of_particles) if debug: finfo.write("image_start, image_end: %d %d\n" %(image_start, image_end)) finfo.flush() data = EMData.read_images(stack, list_of_particles) t_zero = Transform({"type":"spider","phi":0,"theta":0,"psi":0,"tx":0,"ty":0}) transmulti = [[t_zero for i in xrange(nrefs)] for j in xrange(nima)] for iref,im in ((iref,im) for iref in xrange(nrefs) for im in xrange(nima)): if nrefs == 1: transmulti[im][iref] = data[im].get_attr("xform.projection") else: # if multi models, keep track of eulers for all models try: transmulti[im][iref] = data[im].get_attr("eulers_txty.%i"%iref) except: data[im].set_attr("eulers_txty.%i"%iref,t_zero) scoremulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)] pixelmulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)] ref_res = [0.0 for x in xrange(nrefs)] apix = data[0].get_attr('apix_x') # for oplane parameter, create cylindrical mask if oplane is not None and myid == main_node: from hfunctions import createCylMask cmaskf=os.path.join(outdir, "mask3D_cyl.mrc") mask3D = createCylMask(data,ou,lmask,ilmask,cmaskf) # if finding seam of helix, create wedge masks if findseam is True: wedgemask=[] for pf in xrange(nrefs): wedgemask.append(EMData()) # wedgemask option if wcmask is not None: wcmask = get_input_from_string(wcmask) if len(wcmask) != 3: print_msg("Error: wcmask option requires 3 values: x y radius") sys.exit() # determine if particles have helix info: try: data[0].get_attr('h_angle') original_data = [] boxmask = True from hfunctions import createBoxMask except: boxmask = False # prepare particles for im in xrange(nima): data[im].set_attr('ID', list_of_particles[im]) data[im].set_attr('pix_score', int(0)) if CTF: # only phaseflip particles, not full CTF correction ctf_params = data[im].get_attr("ctf") st = Util.infomask(data[im], mask2D, False) data[im] -= st[0] data[im] = filt_ctf(data[im], ctf_params, sign = -1, binary=1) data[im].set_attr('ctf_applied', 1) # for window mask: if boxmask is True: h_angle = data[im].get_attr("h_angle") original_data.append(data[im].copy()) bmask = createBoxMask(nx,apix,ou,lmask,h_angle) data[im]*=bmask del bmask if debug: finfo.write( '%d loaded \n' % nima ) finfo.flush() if myid == main_node: # initialize data for the reference preparation function ref_data = [ mask3D, max(center,0), None, None, None, None ] # for method -1, switch off centering in user function from time import time # this is needed for gathering of pixel errors disps = [] recvcount = [] disps_score = [] recvcount_score = [] for im in xrange(number_of_proc): if( im == main_node ): disps.append(0) disps_score.append(0) else: disps.append(disps[im-1] + recvcount[im-1]) disps_score.append(disps_score[im-1] + recvcount_score[im-1]) ib, ie = MPI_start_end(total_nima, number_of_proc, im) recvcount.append( ie - ib ) recvcount_score.append((ie-ib)*nrefs) pixer = [0.0]*nima cs = [0.0]*3 total_iter = 0 volodd = EMData.read_images(ref_vol, xrange(nrefs)) voleve = EMData.read_images(ref_vol, xrange(nrefs)) if restart: # recreate initial volumes from alignments stored in header itout = "000_00" for iref in xrange(nrefs): if(nrefs == 1): modout = "" else: modout = "_model_%02d"%(iref) if(sort): group = iref for im in xrange(nima): imgroup = data[im].get_attr('group') if imgroup == iref: data[im].set_attr('xform.projection',transmulti[im][iref]) else: group = int(999) for im in xrange(nima): data[im].set_attr('xform.projection',transmulti[im][iref]) fscfile = os.path.join(outdir, "fsc_%s%s"%(itout,modout)) vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF(data, sym, fscmask, fscfile, myid, main_node, index = group, npad = recon_pad) if myid == main_node: if helicalrecon: from hfunctions import processHelicalVol vstep=None if vertstep is not None: vstep=(vdp[iref],vdphi[iref]) print_msg("Old rise and twist for model %i : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref])) hvals=processHelicalVol(vol[iref],voleve[iref],volodd[iref],iref,outdir,itout, dp[iref],dphi[iref],apix,hsearch,findseam,vstep,wcmask) (vol[iref],voleve[iref],volodd[iref],dp[iref],dphi[iref],vdp[iref],vdphi[iref])=hvals print_msg("New rise and twist for model %i : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref])) # get new FSC from symmetrized half volumes fscc = fsc_mask( volodd[iref], voleve[iref], mask3D, rstep, fscfile) else: vol[iref].write_image(os.path.join(outdir, "vol_%s.hdf"%itout),-1) if save_half is True: volodd[iref].write_image(os.path.join(outdir, "volodd_%s.hdf"%itout),-1) voleve[iref].write_image(os.path.join(outdir, "voleve_%s.hdf"%itout),-1) if nmasks > 1: # Read mask for multiplying ref_data[0] = maskF[iref] ref_data[2] = vol[iref] ref_data[3] = fscc # call user-supplied function to prepare reference image, i.e., center and filter it vol[iref], cs,fl = ref_ali3d(ref_data) vol[iref].write_image(os.path.join(outdir, "volf_%s.hdf"%(itout)),-1) if (apix == 1): res_msg = "Models filtered at spatial frequency of:\t" res = fl else: res_msg = "Models filtered at resolution of: \t" res = apix / fl ares = array2string(array(res), precision = 2) print_msg("%s%s\n\n"%(res_msg,ares)) bcast_EMData_to_all(vol[iref], myid, main_node) # write out headers, under MPI writing has to be done sequentially mpi_barrier(MPI_COMM_WORLD) # projection matching for N_step in xrange(lstp): terminate = 0 Iter = -1 while(Iter < max_iter-1 and terminate == 0): Iter += 1 total_iter += 1 itout = "%03g_%02d" %(delta[N_step], Iter) if myid == main_node: print_msg("ITERATION #%3d, inner iteration #%3d\nDelta = %4.1f, an = %5.2f, xrange = %5.2f, yrange = %5.2f, step = %5.2f\n\n"%(N_step, Iter, delta[N_step], an[N_step], xrng[N_step],yrng[N_step],step[N_step])) for iref in xrange(nrefs): if myid == main_node: start_time = time() volft,kb = prep_vol( vol[iref] ) ## constrain projections to out of plane parameter theta1 = None theta2 = None if oplane is not None: theta1 = 90-oplane theta2 = 90+oplane refrings = prepare_refrings( volft, kb, nx, delta[N_step], ref_a, sym, numr, MPI=True, phiEqpsi = "Minus", initial_theta=theta1, delta_theta=theta2) del volft,kb if myid== main_node: print_msg( "Time to prepare projections for model %i: %s\n" % (iref, legibleTime(time()-start_time)) ) start_time = time() for im in xrange( nima ): data[im].set_attr("xform.projection", transmulti[im][iref]) if an[N_step] == -1: t1, peak, pixer[im] = proj_ali_incore(data[im],refrings,numr,xrng[N_step],yrng[N_step],step[N_step],finfo) else: t1, peak, pixer[im] = proj_ali_incore_local(data[im],refrings,numr,xrng[N_step],yrng[N_step],step[N_step],an[N_step],finfo) #data[im].set_attr("xform.projection"%iref, t1) if nrefs > 1: data[im].set_attr("eulers_txty.%i"%iref,t1) scoremulti[im][iref] = peak from pixel_error import max_3D_pixel_error # t1 is the current param, t2 is old t2 = transmulti[im][iref] pixelmulti[im][iref] = max_3D_pixel_error(t1,t2,numr[-3]) transmulti[im][iref] = t1 if myid == main_node: print_msg("Time of alignment for model %i: %s\n"%(iref, legibleTime(time()-start_time))) start_time = time() # gather scoring data from all processors from mpi import mpi_gatherv scoremultisend = sum(scoremulti,[]) pixelmultisend = sum(pixelmulti,[]) tmp = mpi_gatherv(scoremultisend,len(scoremultisend),MPI_FLOAT, recvcount_score, disps_score, MPI_FLOAT, main_node,MPI_COMM_WORLD) tmp1 = mpi_gatherv(pixelmultisend,len(pixelmultisend),MPI_FLOAT, recvcount_score, disps_score, MPI_FLOAT, main_node,MPI_COMM_WORLD) tmp = mpi_bcast(tmp,(total_nima * nrefs), MPI_FLOAT,0, MPI_COMM_WORLD) tmp1 = mpi_bcast(tmp1,(total_nima * nrefs), MPI_FLOAT,0, MPI_COMM_WORLD) tmp = map(float,tmp) tmp1 = map(float,tmp1) score = array(tmp).reshape(-1,nrefs) pixelerror = array(tmp1).reshape(-1,nrefs) score_local = array(scoremulti) mean_score = score.mean(axis=0) std_score = score.std(axis=0) cut = mean_score - (cutoff * std_score) cut2 = mean_score + (cutoff * std_score) res_max = score_local.argmax(axis=1) minus_cc = [0.0 for x in xrange(nrefs)] minus_pix = [0.0 for x in xrange(nrefs)] minus_ref = [0.0 for x in xrange(nrefs)] #output pixel errors if(myid == main_node): from statistics import hist_list lhist = 20 pixmin = pixelerror.min(axis=1) region, histo = hist_list(pixmin, lhist) if(region[0] < 0.0): region[0] = 0.0 print_msg("Histogram of pixel errors\n ERROR number of particles\n") for lhx in xrange(lhist): print_msg(" %10.3f %7d\n"%(region[lhx], histo[lhx])) # Terminate if 95% within 1 pixel error im = 0 for lhx in xrange(lhist): if(region[lhx] > 1.0): break im += histo[lhx] print_msg( "Percent of particles with pixel error < 1: %f\n\n"% (im/float(total_nima)*100)) term_cond = float(term)/100 if(im/float(total_nima) > term_cond): terminate = 1 print_msg("Terminating internal loop\n") del region, histo terminate = mpi_bcast(terminate, 1, MPI_INT, 0, MPI_COMM_WORLD) terminate = int(terminate[0]) for im in xrange(nima): if(sort==False): data[im].set_attr('group',999) elif (mjump[N_step]==1): data[im].set_attr('group',int(res_max[im])) pix_run = data[im].get_attr('pix_score') if (pix_cutoff[N_step]==1 and (terminate==1 or Iter == max_iter-1)): if (pixelmulti[im][int(res_max[im])] > 1): data[im].set_attr('pix_score',int(777)) if (score_local[im][int(res_max[im])]<cut[int(res_max[im])]) or (two_tail and score_local[im][int(res_max[im])]>cut2[int(res_max[im])]): data[im].set_attr('group',int(888)) minus_cc[int(res_max[im])] = minus_cc[int(res_max[im])] + 1 if(pix_run == 777): data[im].set_attr('group',int(777)) minus_pix[int(res_max[im])] = minus_pix[int(res_max[im])] + 1 if (compare_ref_free != "-1") and (ref_free_cutoff[N_step] != -1) and (total_iter > 1): id = data[im].get_attr('ID') if id in rejects: data[im].set_attr('group',int(666)) minus_ref[int(res_max[im])] = minus_ref[int(res_max[im])] + 1 minus_cc_tot = mpi_reduce(minus_cc,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD) minus_pix_tot = mpi_reduce(minus_pix,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD) minus_ref_tot = mpi_reduce(minus_ref,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD) if (myid == main_node): if(sort): tot_max = score.argmax(axis=1) res = bincount(tot_max) else: res = ones(nrefs) * total_nima print_msg("Particle distribution: \t\t%s\n"%(res*1.0)) afcut1 = res - minus_cc_tot afcut2 = afcut1 - minus_pix_tot afcut3 = afcut2 - minus_ref_tot print_msg("Particle distribution after cc cutoff:\t\t%s\n"%(afcut1)) print_msg("Particle distribution after pix cutoff:\t\t%s\n"%(afcut2)) print_msg("Particle distribution after ref cutoff:\t\t%s\n\n"%(afcut3)) res = [0.0 for i in xrange(nrefs)] for iref in xrange(nrefs): if(center == -1): from utilities import estimate_3D_center_MPI, rotate_3D_shift dummy=EMData() cs[0], cs[1], cs[2], dummy, dummy = estimate_3D_center_MPI(data, total_nima, myid, number_of_proc, main_node) cs = mpi_bcast(cs, 3, MPI_FLOAT, main_node, MPI_COMM_WORLD) cs = [-float(cs[0]), -float(cs[1]), -float(cs[2])] rotate_3D_shift(data, cs) if(sort): group = iref for im in xrange(nima): imgroup = data[im].get_attr('group') if imgroup == iref: data[im].set_attr('xform.projection',transmulti[im][iref]) else: group = int(999) for im in xrange(nima): data[im].set_attr('xform.projection',transmulti[im][iref]) if(nrefs == 1): modout = "" else: modout = "_model_%02d"%(iref) fscfile = os.path.join(outdir, "fsc_%s%s"%(itout,modout)) vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF(data, sym, fscmask, fscfile, myid, main_node, index=group, npad=recon_pad) if myid == main_node: print_msg("3D reconstruction time for model %i: %s\n"%(iref, legibleTime(time()-start_time))) start_time = time() # Compute Fourier variance if fourvar: outvar = os.path.join(outdir, "volVar_%s.hdf"%(itout)) ssnr_file = os.path.join(outdir, "ssnr_%s"%(itout)) varf = varf3d_MPI(data, ssnr_text_file=ssnr_file, mask2D=None, reference_structure=vol[iref], ou=last_ring, rw=1.0, npad=1, CTF=None, sign=1, sym=sym, myid=myid) if myid == main_node: print_msg("Time to calculate 3D Fourier variance for model %i: %s\n"%(iref, legibleTime(time()-start_time))) start_time = time() varf = 1.0/varf varf.write_image(outvar,-1) else: varf = None if myid == main_node: if helicalrecon: from hfunctions import processHelicalVol vstep=None if vertstep is not None: vstep=(vdp[iref],vdphi[iref]) print_msg("Old rise and twist for model %i : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref])) hvals=processHelicalVol(vol[iref],voleve[iref],volodd[iref],iref,outdir,itout, dp[iref],dphi[iref],apix,hsearch,findseam,vstep,wcmask) (vol[iref],voleve[iref],volodd[iref],dp[iref],dphi[iref],vdp[iref],vdphi[iref])=hvals print_msg("New rise and twist for model %i : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref])) # get new FSC from symmetrized half volumes fscc = fsc_mask( volodd[iref], voleve[iref], mask3D, rstep, fscfile) print_msg("Time to search and apply helical symmetry for model %i: %s\n\n"%(iref, legibleTime(time()-start_time))) start_time = time() else: vol[iref].write_image(os.path.join(outdir, "vol_%s.hdf"%(itout)),-1) if save_half is True: volodd[iref].write_image(os.path.join(outdir, "volodd_%s.hdf"%(itout)),-1) voleve[iref].write_image(os.path.join(outdir, "voleve_%s.hdf"%(itout)),-1) if nmasks > 1: # Read mask for multiplying ref_data[0] = maskF[iref] ref_data[2] = vol[iref] ref_data[3] = fscc ref_data[4] = varf # call user-supplied function to prepare reference image, i.e., center and filter it vol[iref], cs,fl = ref_ali3d(ref_data) vol[iref].write_image(os.path.join(outdir, "volf_%s.hdf"%(itout)),-1) if (apix == 1): res_msg = "Models filtered at spatial frequency of:\t" res[iref] = fl else: res_msg = "Models filtered at resolution of: \t" res[iref] = apix / fl del varf bcast_EMData_to_all(vol[iref], myid, main_node) if compare_ref_free != "-1": compare_repro = True if compare_repro: outfile_repro = comp_rep(refrings, data, itout, modout, vol[iref], group, nima, nx, myid, main_node, outdir) mpi_barrier(MPI_COMM_WORLD) if compare_ref_free != "-1": ref_free_output = os.path.join(outdir,"ref_free_%s%s"%(itout,modout)) rejects = compare(compare_ref_free, outfile_repro,ref_free_output,yrng[N_step], xrng[N_step], rstep,nx,apix,ref_free_cutoff[N_step], number_of_proc, myid, main_node) # retrieve alignment params from all processors par_str = ['xform.projection','ID','group'] if nrefs > 1: for iref in xrange(nrefs): par_str.append('eulers_txty.%i'%iref) if myid == main_node: from utilities import recv_attr_dict recv_attr_dict(main_node, stack, data, par_str, image_start, image_end, number_of_proc) else: send_attr_dict(main_node, data, par_str, image_start, image_end) if myid == main_node: ares = array2string(array(res), precision = 2) print_msg("%s%s\n\n"%(res_msg,ares)) dummy = EMData() if full_output: nimat = EMUtil.get_image_count(stack) output_file = os.path.join(outdir, "paramout_%s"%itout) foutput = open(output_file, 'w') for im in xrange(nimat): # save the parameters for each of the models outstring = "" dummy.read_image(stack,im,True) param3d = dummy.get_attr('xform.projection') g = dummy.get_attr("group") # retrieve alignments in EMAN-format pE = param3d.get_params('eman') outstring += "%f\t%f\t%f\t%f\t%f\t%i\n" %(pE["az"], pE["alt"], pE["phi"], pE["tx"], pE["ty"],g) foutput.write(outstring) foutput.close() del dummy mpi_barrier(MPI_COMM_WORLD) # mpi_finalize() if myid == main_node: print_end_msg("ali3d_MPI")
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + """ input_micrograph_list_file input_micrograph_pattern input_coordinates_pattern output_directory --coordinates_format --box_size=box_size --invert --import_ctf=ctf_file --limit_ctf --resample_ratio=resample_ratio --defocus_error=defocus_error --astigmatism_error=astigmatism_error Window particles from micrographs in input list file. The coordinates of the particles should be given as input. Please specify name pattern of input micrographs and coordinates files with a wild card (*). Use the wild card to indicate the place of micrograph ID (e.g. serial number, time stamp, and etc). The name patterns must be enclosed by single quotes (') or double quotes ("). (Note: sxgui.py automatically adds single quotes (')). BDB files can not be selected as input micrographs. sxwindow.py mic_list.txt ./mic*.hdf info/mic*_info.json particles --coordinates_format=eman2 --box_size=64 --invert --import_ctf=outdir_cter/partres/partres.txt If micrograph list file name is not provided, all files matched with the micrograph name pattern will be processed. sxwindow.py ./mic*.hdf info/mic*_info.json particles --coordinates_format=eman2 --box_size=64 --invert --import_ctf=outdir_cter/partres/partres.txt """ parser = OptionParser(usage, version=SPARXVERSION) parser.add_option( "--coordinates_format", type="string", default="eman1", help= "format of input coordinates files: 'sparx', 'eman1', 'eman2', or 'spider'. the coordinates of sparx, eman2, and spider format is particle center. the coordinates of eman1 format is particle box conner associated with the original box size. (default eman1)" ) parser.add_option( "--box_size", type="int", default=256, help= "x and y dimension of square area to be windowed (in pixels): pixel size after resampling is assumed when resample_ratio < 1.0 (default 256)" ) parser.add_option( "--invert", action="store_true", default=False, help="invert image contrast: recommended for cryo data (default False)" ) parser.add_option( "--import_ctf", type="string", default="", help="file name of sxcter output: normally partres.txt (default none)") parser.add_option( "--limit_ctf", action="store_true", default=False, help= "filter micrographs based on the CTF limit: this option requires --import_ctf. (default False)" ) parser.add_option( "--resample_ratio", type="float", default=1.0, help= "ratio of new to old image size (or old to new pixel size) for resampling: Valid range is 0.0 < resample_ratio <= 1.0. (default 1.0)" ) parser.add_option( "--defocus_error", type="float", default=1000000.0, help= "defocus errror limit: exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocus_error percent. the error is computed as (std dev defocus)/defocus*100%. (default 1000000.0)" ) parser.add_option( "--astigmatism_error", type="float", default=360.0, help= "astigmatism error limit: Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatism_error degrees. (default 360.0)" ) ### detect if program is running under MPI RUNNING_UNDER_MPI = "OMPI_COMM_WORLD_SIZE" in os.environ main_node = 0 if RUNNING_UNDER_MPI: from mpi import mpi_init from mpi import MPI_COMM_WORLD, mpi_comm_rank, mpi_comm_size, mpi_barrier, mpi_reduce, MPI_INT, MPI_SUM mpi_init(0, []) myid = mpi_comm_rank(MPI_COMM_WORLD) number_of_processes = mpi_comm_size(MPI_COMM_WORLD) else: number_of_processes = 1 myid = 0 (options, args) = parser.parse_args(sys.argv[1:]) mic_list_file_path = None mic_pattern = None coords_pattern = None error_status = None while True: if len(args) < 3 or len(args) > 4: error_status = ( "Please check usage for number of arguments.\n Usage: " + usage + "\n" + "Please run %s -h for help." % (progname), getframeinfo(currentframe())) break if len(args) == 3: mic_pattern = args[0] coords_pattern = args[1] out_dir = args[2] else: # assert(len(args) == 4) mic_list_file_path = args[0] mic_pattern = args[1] coords_pattern = args[2] out_dir = args[3] if mic_list_file_path != None: if os.path.splitext(mic_list_file_path)[1] != ".txt": error_status = ( "Extension of input micrograph list file must be \".txt\". Please check input_micrograph_list_file argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) break if mic_pattern[:len("bdb:")].lower() == "bdb": error_status = ( "BDB file can not be selected as input micrographs. Please convert the format, and restart the program. Run %s -h for help." % (progname), getframeinfo(currentframe())) break if mic_pattern.find("*") == -1: error_status = ( "Input micrograph file name pattern must contain wild card (*). Please check input_micrograph_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) break if coords_pattern.find("*") == -1: error_status = ( "Input coordinates file name pattern must contain wild card (*). Please check input_coordinates_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) break if myid == main_node: if os.path.exists(out_dir): error_status = ( "Output directory exists. Please change the name and restart the program.", getframeinfo(currentframe())) break break if_error_then_all_processes_exit_program(error_status) # Check invalid conditions of options check_options(options, progname) mic_name_list = None error_status = None if myid == main_node: if mic_list_file_path != None: print("Loading micrograph list from %s file ..." % (mic_list_file_path)) mic_name_list = read_text_file(mic_list_file_path) if len(mic_name_list) == 0: print("Directory of first micrograph entry is " % (os.path.dirname(mic_name_list[0]))) else: # assert (mic_list_file_path == None) print("Generating micrograph list in %s directory..." % (os.path.dirname(mic_pattern))) mic_name_list = glob.glob(mic_pattern) if len(mic_name_list) == 0: error_status = ( "No micrograph file is found. Please check input_micrograph_pattern and/or input_micrograph_list_file argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) else: print("Found %d microgarphs" % len(mic_name_list)) if_error_then_all_processes_exit_program(error_status) if RUNNING_UNDER_MPI: mic_name_list = wrap_mpi_bcast(mic_name_list, main_node) coords_name_list = None error_status = None if myid == main_node: coords_name_list = glob.glob(coords_pattern) if len(coords_name_list) == 0: error_status = ( "No coordinates file is found. Please check input_coordinates_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) if_error_then_all_processes_exit_program(error_status) if RUNNING_UNDER_MPI: coords_name_list = wrap_mpi_bcast(coords_name_list, main_node) ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## # all processes must have access to indices if options.import_ctf: i_enum = -1 i_enum += 1 idx_cter_def = i_enum # defocus [um]; index must be same as ctf object format i_enum += 1 idx_cter_cs = i_enum # Cs [mm]; index must be same as ctf object format i_enum += 1 idx_cter_vol = i_enum # voltage[kV]; index must be same as ctf object format i_enum += 1 idx_cter_apix = i_enum # pixel size [A]; index must be same as ctf object format i_enum += 1 idx_cter_bfactor = i_enum # B-factor [A^2]; index must be same as ctf object format i_enum += 1 idx_cter_ac = i_enum # amplitude contrast [%]; index must be same as ctf object format i_enum += 1 idx_cter_astig_amp = i_enum # astigmatism amplitude [um]; index must be same as ctf object format i_enum += 1 idx_cter_astig_ang = i_enum # astigmatism angle [degree]; index must be same as ctf object format i_enum += 1 idx_cter_sd_def = i_enum # std dev of defocus [um] i_enum += 1 idx_cter_sd_astig_amp = i_enum # std dev of ast amp [A] i_enum += 1 idx_cter_sd_astig_ang = i_enum # std dev of ast angle [degree] i_enum += 1 idx_cter_cv_def = i_enum # coefficient of variation of defocus [%] i_enum += 1 idx_cter_cv_astig_amp = i_enum # coefficient of variation of ast amp [%] i_enum += 1 idx_cter_spectra_diff = i_enum # average of differences between with- and without-astig. experimental 1D spectra at extrema i_enum += 1 idx_cter_error_def = i_enum # frequency at which signal drops by 50% due to estimated error of defocus alone [1/A] i_enum += 1 idx_cter_error_astig = i_enum # frequency at which signal drops by 50% due to estimated error of defocus and astigmatism [1/A] i_enum += 1 idx_cter_error_ctf = i_enum # limit frequency by CTF error [1/A] i_enum += 1 idx_cter_mic_name = i_enum # micrograph name i_enum += 1 n_idx_cter = i_enum # Prepare loop variables mic_basename_pattern = os.path.basename( mic_pattern) # file pattern without path mic_baseroot_pattern = os.path.splitext(mic_basename_pattern)[ 0] # file pattern without path and extension coords_format = options.coordinates_format.lower() box_size = options.box_size box_half = box_size // 2 mask2d = model_circle( box_size // 2, box_size, box_size ) # Create circular 2D mask to Util.infomask of particle images resample_ratio = options.resample_ratio n_mic_process = 0 n_mic_reject_no_coords = 0 n_mic_reject_no_cter_entry = 0 n_global_coords_detect = 0 n_global_coords_process = 0 n_global_coords_reject_out_of_boundary = 0 serial_id_list = [] error_status = None ## not a real while, an if with the opportunity to use break when errors need to be reported while myid == main_node: # # NOTE: 2016/05/24 Toshio Moriya # Now, ignores the path in mic_pattern and entries of mic_name_list to create serial ID # Only the basename (file name) in micrograph path must be match # # Create list of micrograph serial ID # Break micrograph name pattern into prefix and suffix to find the head index of the micrograph serial id # mic_basename_tokens = mic_basename_pattern.split('*') # assert (len(mic_basename_tokens) == 2) serial_id_head_index = len(mic_basename_tokens[0]) # Loop through micrograph names for mic_name in mic_name_list: # Find the tail index of the serial id and extract serial id from the micrograph name mic_basename = os.path.basename(mic_name) serial_id_tail_index = mic_basename.index(mic_basename_tokens[1]) serial_id = mic_basename[serial_id_head_index:serial_id_tail_index] serial_id_list.append(serial_id) # assert (len(serial_id_list) == len(mic_name)) del mic_name_list # Do not need this anymore # Load CTFs if necessary if options.import_ctf: ctf_list = read_text_row(options.import_ctf) # print("Detected CTF entries : %6d ..." % (len(ctf_list))) if len(ctf_list) == 0: error_status = ( "No CTF entry is found in %s. Please check --import_ctf option. Run %s -h for help." % (options.import_ctf, progname), getframeinfo(currentframe())) break if (len(ctf_list[0]) != n_idx_cter): error_status = ( "Number of columns (%d) must be %d in %s. The format might be old. Please run sxcter.py again." % (len(ctf_list[0]), n_idx_cter, options.import_ctf), getframeinfo(currentframe())) break ctf_dict = {} n_reject_defocus_error = 0 ctf_error_limit = [ options.defocus_error / 100.0, options.astigmatism_error ] for ctf_params in ctf_list: assert (len(ctf_params) == n_idx_cter) # mic_baseroot is name of micrograph minus the path and extension mic_baseroot = os.path.splitext( os.path.basename(ctf_params[idx_cter_mic_name]))[0] if (ctf_params[idx_cter_sd_def] / ctf_params[idx_cter_def] > ctf_error_limit[0]): print( "Defocus error %f exceeds the threshold. Micrograph %s is rejected." % (ctf_params[idx_cter_sd_def] / ctf_params[idx_cter_def], mic_baseroot)) n_reject_defocus_error += 1 else: if (ctf_params[idx_cter_sd_astig_ang] > ctf_error_limit[1]): ctf_params[idx_cter_astig_amp] = 0.0 ctf_params[idx_cter_astig_ang] = 0.0 ctf_dict[mic_baseroot] = ctf_params del ctf_list # Do not need this anymore break if_error_then_all_processes_exit_program(error_status) if options.import_ctf: if options.limit_ctf: cutoff_histogram = [ ] #@ming compute the histogram for micrographs cut of by ctf_params limit. ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## restricted_serial_id_list = [] if myid == main_node: # Loop over serial IDs of micrographs for serial_id in serial_id_list: # mic_baseroot is name of micrograph minus the path and extension mic_baseroot = mic_baseroot_pattern.replace("*", serial_id) mic_name = mic_pattern.replace("*", serial_id) coords_name = coords_pattern.replace("*", serial_id) ########### # CHECKS: BEGIN if coords_name not in coords_name_list: print(" Cannot read %s. Skipping %s ..." % (coords_name, mic_baseroot)) n_mic_reject_no_coords += 1 continue # IF mic is in CTER results if options.import_ctf: if mic_baseroot not in ctf_dict: print( " Is not listed in CTER results. Skipping %s ..." % (mic_baseroot)) n_mic_reject_no_cter_entry += 1 continue else: ctf_params = ctf_dict[mic_baseroot] # CHECKS: END n_mic_process += 1 restricted_serial_id_list.append(serial_id) # restricted_serial_id_list = restricted_serial_id_list[:128] ## for testing against the nonMPI version if myid != main_node: if options.import_ctf: ctf_dict = None error_status = None if len(restricted_serial_id_list) < number_of_processes: error_status = ( 'Number of processes (%d) supplied by --np in mpirun cannot be greater than %d (number of micrographs that satisfy all criteria to be processed) ' % (number_of_processes, len(restricted_serial_id_list)), getframeinfo(currentframe())) if_error_then_all_processes_exit_program(error_status) ## keep a copy of the original output directory where the final bdb will be created original_out_dir = out_dir if RUNNING_UNDER_MPI: mpi_barrier(MPI_COMM_WORLD) restricted_serial_id_list = wrap_mpi_bcast(restricted_serial_id_list, main_node) mic_start, mic_end = MPI_start_end(len(restricted_serial_id_list), number_of_processes, myid) restricted_serial_id_list_not_sliced = restricted_serial_id_list restricted_serial_id_list = restricted_serial_id_list[ mic_start:mic_end] if options.import_ctf: ctf_dict = wrap_mpi_bcast(ctf_dict, main_node) # generate subdirectories of out_dir, one for each process out_dir = os.path.join(out_dir, "%03d" % myid) if myid == main_node: print( "Micrographs processed by main process (including percent complete):" ) len_processed_by_main_node_divided_by_100 = len( restricted_serial_id_list) / 100.0 ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## ##### Starting main parallel execution for my_idx, serial_id in enumerate(restricted_serial_id_list): mic_baseroot = mic_baseroot_pattern.replace("*", serial_id) mic_name = mic_pattern.replace("*", serial_id) coords_name = coords_pattern.replace("*", serial_id) if myid == main_node: print( mic_name, " ---> % 2.2f%%" % (my_idx / len_processed_by_main_node_divided_by_100)) mic_img = get_im(mic_name) # Read coordinates according to the specified format and # make the coordinates the center of particle image if coords_format == "sparx": coords_list = read_text_row(coords_name) elif coords_format == "eman1": coords_list = read_text_row(coords_name) for i in xrange(len(coords_list)): coords_list[i] = [(coords_list[i][0] + coords_list[i][2] // 2), (coords_list[i][1] + coords_list[i][3] // 2)] elif coords_format == "eman2": coords_list = js_open_dict(coords_name)["boxes"] for i in xrange(len(coords_list)): coords_list[i] = [coords_list[i][0], coords_list[i][1]] elif coords_format == "spider": coords_list = read_text_row(coords_name) for i in xrange(len(coords_list)): coords_list[i] = [coords_list[i][2], coords_list[i][3]] # else: assert (False) # Unreachable code # Calculate the new pixel size if options.import_ctf: ctf_params = ctf_dict[mic_baseroot] pixel_size_origin = ctf_params[idx_cter_apix] if resample_ratio < 1.0: # assert (resample_ratio > 0.0) new_pixel_size = pixel_size_origin / resample_ratio print( "Resample micrograph to pixel size %6.4f and window segments from resampled micrograph." % new_pixel_size) else: # assert (resample_ratio == 1.0) new_pixel_size = pixel_size_origin # Set ctf along with new pixel size in resampled micrograph ctf_params[idx_cter_apix] = new_pixel_size else: # assert (not options.import_ctf) if resample_ratio < 1.0: # assert (resample_ratio > 0.0) print( "Resample micrograph with ratio %6.4f and window segments from resampled micrograph." % resample_ratio) # else: # assert (resample_ratio == 1.0) # Apply filters to micrograph fftip(mic_img) if options.limit_ctf: # assert (options.import_ctf) # Cut off frequency components higher than CTF limit q1, q2 = ctflimit(box_size, ctf_params[idx_cter_def], ctf_params[idx_cter_cs], ctf_params[idx_cter_vol], new_pixel_size) # This is absolute frequency of CTF limit in scale of original micrograph if resample_ratio < 1.0: # assert (resample_ratio > 0.0) q1 = resample_ratio * q1 / float( box_size ) # q1 = (pixel_size_origin / new_pixel_size) * q1/float(box_size) else: # assert (resample_ratio == 1.0) -> pixel_size_origin == new_pixel_size -> pixel_size_origin / new_pixel_size == 1.0 q1 = q1 / float(box_size) if q1 < 0.5: mic_img = filt_tanl(mic_img, q1, 0.01) cutoff_histogram.append(q1) # Cut off frequency components lower than the box size can express mic_img = fft(filt_gaussh(mic_img, resample_ratio / box_size)) # Resample micrograph, map coordinates, and window segments from resampled micrograph using new coordinates # after resampling by resample_ratio, new pixel size will be pixel_size/resample_ratio = new_pixel_size # NOTE: 2015/04/13 Toshio Moriya # resample() efficiently takes care of the case resample_ratio = 1.0 but # it does not set apix_*. Even though it sets apix_* when resample_ratio < 1.0 ... mic_img = resample(mic_img, resample_ratio) if options.invert: mic_stats = Util.infomask( mic_img, None, True) # mic_stat[0:mean, 1:SD, 2:min, 3:max] Util.mul_scalar(mic_img, -1.0) mic_img += 2 * mic_stats[0] if options.import_ctf: from utilities import generate_ctf ctf_obj = generate_ctf( ctf_params ) # indexes 0 to 7 (idx_cter_def to idx_cter_astig_ang) must be same in cter format & ctf object format. # Prepare loop variables nx = mic_img.get_xsize() ny = mic_img.get_ysize() x0 = nx // 2 y0 = ny // 2 n_coords_reject_out_of_boundary = 0 local_stack_name = "bdb:%s#" % out_dir + mic_baseroot + '_ptcls' local_particle_id = 0 # can be different from coordinates_id # Loop over coordinates for coords_id in xrange(len(coords_list)): x = int(coords_list[coords_id][0]) y = int(coords_list[coords_id][1]) if resample_ratio < 1.0: # assert (resample_ratio > 0.0) x = int(x * resample_ratio) y = int(y * resample_ratio) # else: # assert(resample_ratio == 1.0) if ((0 <= x - box_half) and (x + box_half <= nx) and (0 <= y - box_half) and (y + box_half <= ny)): particle_img = Util.window(mic_img, box_size, box_size, 1, x - x0, y - y0) else: print( "In %s, coordinates ID = %04d (x = %4d, y = %4d, box_size = %4d) is out of micrograph bound, skipping ..." % (mic_baseroot, coords_id, x, y, box_size)) n_coords_reject_out_of_boundary += 1 continue particle_img = ramp(particle_img) particle_stats = Util.infomask( particle_img, mask2d, False) # particle_stats[0:mean, 1:SD, 2:min, 3:max] particle_img -= particle_stats[0] particle_img /= particle_stats[1] # NOTE: 2015/04/09 Toshio Moriya # ptcl_source_image might be redundant information ... # Consider re-organizing header entries... particle_img.set_attr("ptcl_source_image", mic_name) particle_img.set_attr("ptcl_source_coord_id", coords_id) particle_img.set_attr("ptcl_source_coord", [ int(coords_list[coords_id][0]), int(coords_list[coords_id][1]) ]) particle_img.set_attr("resample_ratio", resample_ratio) # NOTE: 2015/04/13 Toshio Moriya # apix_* attributes are updated by resample() only when resample_ratio != 1.0 # Let's make sure header info is consistent by setting apix_* = 1.0 # regardless of options, so it is not passed down the processing line particle_img.set_attr("apix_x", 1.0) particle_img.set_attr("apix_y", 1.0) particle_img.set_attr("apix_z", 1.0) if options.import_ctf: particle_img.set_attr("ctf", ctf_obj) particle_img.set_attr("ctf_applied", 0) particle_img.set_attr("pixel_size_origin", pixel_size_origin) # particle_img.set_attr("apix_x", new_pixel_size) # particle_img.set_attr("apix_y", new_pixel_size) # particle_img.set_attr("apix_z", new_pixel_size) # NOTE: 2015/04/13 Toshio Moriya # Pawel Comment: Micrograph is not supposed to have CTF header info. # So, let's assume it does not exist & ignore its presence. # Note that resample() "correctly" updates pixel size of CTF header info if it exists # elif (particle_img.has_ctff()): # assert(not options.import_ctf) # ctf_origin = particle_img.get_attr("ctf_obj") # pixel_size_origin = round(ctf_origin.apix, 5) # Because SXCTER ouputs up to 5 digits # particle_img.set_attr("apix_x",pixel_size_origin) # particle_img.set_attr("apix_y",pixel_size_origin) # particle_img.set_attr("apix_z",pixel_size_origin) # print("local_stack_name, local_particle_id", local_stack_name, local_particle_id) particle_img.write_image(local_stack_name, local_particle_id) local_particle_id += 1 n_global_coords_detect += len(coords_list) n_global_coords_process += local_particle_id n_global_coords_reject_out_of_boundary += n_coords_reject_out_of_boundary # # MRK_DEBUG: Toshio Moriya 2016/05/03 # # Following codes are for debugging bdb. Delete in future # result = db_check_dict(local_stack_name) # print('# MRK_DEBUG: result = db_check_dict(local_stack_name): %s' % (result)) # result = db_list_dicts('bdb:%s' % out_dir) # print('# MRK_DEBUG: result = db_list_dicts(out_dir): %s' % (result)) # result = db_get_image_info(local_stack_name) # print('# MRK_DEBUG: result = db_get_image_info(local_stack_name)', result) # Release the data base of local stack from this process # so that the subprocess can access to the data base db_close_dict(local_stack_name) # # MRK_DEBUG: Toshio Moriya 2016/05/03 # # Following codes are for debugging bdb. Delete in future # cmd_line = "e2iminfo.py %s" % (local_stack_name) # print('# MRK_DEBUG: Executing the command: %s' % (cmd_line)) # cmdexecute(cmd_line) # # MRK_DEBUG: Toshio Moriya 2016/05/03 # # Following codes are for debugging bdb. Delete in future # cmd_line = "e2iminfo.py bdb:%s#data" % (out_dir) # print('# MRK_DEBUG: Executing the command: %s' % (cmd_line)) # cmdexecute(cmd_line) if RUNNING_UNDER_MPI: if options.import_ctf: if options.limit_ctf: cutoff_histogram = wrap_mpi_gatherv(cutoff_histogram, main_node) if myid == main_node: if options.limit_ctf: # Print out the summary of CTF-limit filtering print(" ") print("Global summary of CTF-limit filtering (--limit_ctf) ...") print("Percentage of filtered micrographs: %8.2f\n" % (len(cutoff_histogram) * 100.0 / len(restricted_serial_id_list_not_sliced))) n_bins = 10 if len(cutoff_histogram) >= n_bins: from statistics import hist_list cutoff_region, cutoff_counts = hist_list( cutoff_histogram, n_bins) print(" Histogram of cut-off frequency") print(" cut-off counts") for bin_id in xrange(n_bins): print(" %14.7f %7d" % (cutoff_region[bin_id], cutoff_counts[bin_id])) else: print( "The number of filtered micrographs (%d) is less than the number of bins (%d). No histogram is produced." % (len(cutoff_histogram), n_bins)) n_mic_process = mpi_reduce(n_mic_process, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_mic_reject_no_coords = mpi_reduce(n_mic_reject_no_coords, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_mic_reject_no_cter_entry = mpi_reduce(n_mic_reject_no_cter_entry, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_global_coords_detect = mpi_reduce(n_global_coords_detect, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_global_coords_process = mpi_reduce(n_global_coords_process, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_global_coords_reject_out_of_boundary = mpi_reduce( n_global_coords_reject_out_of_boundary, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) # Print out the summary of all micrographs if main_node == myid: print(" ") print("Global summary of micrographs ...") print("Detected : %6d" % (len(restricted_serial_id_list_not_sliced))) print("Processed : %6d" % (n_mic_process)) print("Rejected by no coordinates file : %6d" % (n_mic_reject_no_coords)) print("Rejected by no CTER entry : %6d" % (n_mic_reject_no_cter_entry)) print(" ") print("Global summary of coordinates ...") print("Detected : %6d" % (n_global_coords_detect)) print("Processed : %6d" % (n_global_coords_process)) print("Rejected by out of boundary : %6d" % (n_global_coords_reject_out_of_boundary)) # print(" ") # print("DONE!!!") mpi_barrier(MPI_COMM_WORLD) if main_node == myid: import time time.sleep(1) print("\n Creating bdb:%s/data\n" % original_out_dir) for proc_i in range(number_of_processes): mic_start, mic_end = MPI_start_end( len(restricted_serial_id_list_not_sliced), number_of_processes, proc_i) for serial_id in restricted_serial_id_list_not_sliced[ mic_start:mic_end]: e2bdb_command = "e2bdb.py " mic_baseroot = mic_baseroot_pattern.replace("*", serial_id) if RUNNING_UNDER_MPI: e2bdb_command += "bdb:" + os.path.join( original_out_dir, "%03d/" % proc_i) + mic_baseroot + "_ptcls " else: e2bdb_command += "bdb:" + os.path.join( original_out_dir, mic_baseroot + "_ptcls ") e2bdb_command += " --appendvstack=bdb:%s/data 1>/dev/null" % original_out_dir cmdexecute(e2bdb_command, printing_on_success=False) print("Done!\n") if RUNNING_UNDER_MPI: mpi_barrier(MPI_COMM_WORLD) from mpi import mpi_finalize mpi_finalize() sys.stdout.flush() sys.exit(0)
def ali3d_MPI(stack, ref_vol, outdir, maskfile=None, ir=1, ou=-1, rs=1, xr="4 2 2 1", yr="-1", ts="1 1 0.5 0.25", delta="10 6 4 4", an="-1", center=0, maxit=5, term=95, CTF=False, fourvar=False, snr=1.0, ref_a="S", sym="c1", sort=True, cutoff=999.99, pix_cutoff="0", two_tail=False, model_jump="1 1 1 1 1", restart=False, save_half=False, protos=None, oplane=None, lmask=-1, ilmask=-1, findseam=False, vertstep=None, hpars="-1", hsearch="0.0 50.0", full_output=False, compare_repro=False, compare_ref_free="-1", ref_free_cutoff="-1 -1 -1 -1", wcmask=None, debug=False, recon_pad=4, olmask=75): from alignment import Numrinit, prepare_refrings from utilities import model_circle, get_image, drop_image, get_input_from_string from utilities import bcast_list_to_all, bcast_number_to_all, reduce_EMData_to_root, bcast_EMData_to_all from utilities import send_attr_dict from utilities import get_params_proj, file_type from fundamentals import rot_avg_image import os import types from utilities import print_begin_msg, print_end_msg, print_msg from mpi import mpi_bcast, mpi_comm_size, mpi_comm_rank, MPI_FLOAT, MPI_COMM_WORLD, mpi_barrier, mpi_reduce from mpi import mpi_reduce, MPI_INT, MPI_SUM, mpi_finalize from filter import filt_ctf from projection import prep_vol, prgs from statistics import hist_list, varf3d_MPI, fsc_mask from numpy import array, bincount, array2string, ones number_of_proc = mpi_comm_size(MPI_COMM_WORLD) myid = mpi_comm_rank(MPI_COMM_WORLD) main_node = 0 if myid == main_node: if os.path.exists(outdir): ERROR( 'Output directory exists, please change the name and restart the program', "ali3d_MPI", 1) os.mkdir(outdir) mpi_barrier(MPI_COMM_WORLD) if debug: from time import sleep while not os.path.exists(outdir): print "Node ", myid, " waiting..." sleep(5) info_file = os.path.join(outdir, "progress%04d" % myid) finfo = open(info_file, 'w') else: finfo = None mjump = get_input_from_string(model_jump) xrng = get_input_from_string(xr) if yr == "-1": yrng = xrng else: yrng = get_input_from_string(yr) step = get_input_from_string(ts) delta = get_input_from_string(delta) ref_free_cutoff = get_input_from_string(ref_free_cutoff) pix_cutoff = get_input_from_string(pix_cutoff) lstp = min(len(xrng), len(yrng), len(step), len(delta)) if an == "-1": an = [-1] * lstp else: an = get_input_from_string(an) # make sure pix_cutoff is set for all iterations if len(pix_cutoff) < lstp: for i in xrange(len(pix_cutoff), lstp): pix_cutoff.append(pix_cutoff[-1]) # don't waste time on sub-pixel alignment for low-resolution ang incr for i in range(len(step)): if (delta[i] > 4 or delta[i] == -1) and step[i] < 1: step[i] = 1 first_ring = int(ir) rstep = int(rs) last_ring = int(ou) max_iter = int(maxit) center = int(center) nrefs = EMUtil.get_image_count(ref_vol) nmasks = 0 if maskfile: # read number of masks within each maskfile (mc) nmasks = EMUtil.get_image_count(maskfile) # open masks within maskfile (mc) maskF = EMData.read_images(maskfile, xrange(nmasks)) vol = EMData.read_images(ref_vol, xrange(nrefs)) nx = vol[0].get_xsize() ## make sure box sizes are the same if myid == main_node: im = EMData.read_images(stack, [0]) bx = im[0].get_xsize() if bx != nx: print_msg( "Error: Stack box size (%i) differs from initial model (%i)\n" % (bx, nx)) sys.exit() del im, bx # for helical processing: helicalrecon = False if protos is not None or hpars != "-1" or findseam is True: helicalrecon = True # if no out-of-plane param set, use 5 degrees if oplane is None: oplane = 5.0 if protos is not None: proto = get_input_from_string(protos) if len(proto) != nrefs: print_msg("Error: insufficient protofilament numbers supplied") sys.exit() if hpars != "-1": hpars = get_input_from_string(hpars) if len(hpars) != 2 * nrefs: print_msg("Error: insufficient helical parameters supplied") sys.exit() ## create helical parameter file for helical reconstruction if helicalrecon is True and myid == main_node: from hfunctions import createHpar # create initial helical parameter files dp = [0] * nrefs dphi = [0] * nrefs vdp = [0] * nrefs vdphi = [0] * nrefs for iref in xrange(nrefs): hpar = os.path.join(outdir, "hpar%02d.spi" % (iref)) params = False if hpars != "-1": # if helical parameters explicitly given, set twist & rise params = [float(hpars[iref * 2]), float(hpars[(iref * 2) + 1])] dp[iref], dphi[iref], vdp[iref], vdphi[iref] = createHpar( hpar, proto[iref], params, vertstep) # get values for helical search parameters hsearch = get_input_from_string(hsearch) if len(hsearch) != 2: print_msg("Error: specify outer and inner radii for helical search") sys.exit() if last_ring < 0 or last_ring > int(nx / 2) - 2: last_ring = int(nx / 2) - 2 if myid == main_node: # import user_functions # user_func = user_functions.factory[user_func_name] print_begin_msg("ali3d_MPI") print_msg("Input stack : %s\n" % (stack)) print_msg("Reference volume : %s\n" % (ref_vol)) print_msg("Output directory : %s\n" % (outdir)) if nmasks > 0: print_msg("Maskfile (number of masks) : %s (%i)\n" % (maskfile, nmasks)) print_msg("Inner radius : %i\n" % (first_ring)) print_msg("Outer radius : %i\n" % (last_ring)) print_msg("Ring step : %i\n" % (rstep)) print_msg("X search range : %s\n" % (xrng)) print_msg("Y search range : %s\n" % (yrng)) print_msg("Translational step : %s\n" % (step)) print_msg("Angular step : %s\n" % (delta)) print_msg("Angular search range : %s\n" % (an)) print_msg("Maximum iteration : %i\n" % (max_iter)) print_msg("Center type : %i\n" % (center)) print_msg("CTF correction : %s\n" % (CTF)) print_msg("Signal-to-Noise Ratio : %f\n" % (snr)) print_msg("Reference projection method : %s\n" % (ref_a)) print_msg("Symmetry group : %s\n" % (sym)) print_msg("Fourier padding for 3D : %i\n" % (recon_pad)) print_msg("Number of reference models : %i\n" % (nrefs)) print_msg("Sort images between models : %s\n" % (sort)) print_msg("Allow images to jump : %s\n" % (mjump)) print_msg("CC cutoff standard dev : %f\n" % (cutoff)) print_msg("Two tail cutoff : %s\n" % (two_tail)) print_msg("Termination pix error : %f\n" % (term)) print_msg("Pixel error cutoff : %s\n" % (pix_cutoff)) print_msg("Restart : %s\n" % (restart)) print_msg("Full output : %s\n" % (full_output)) print_msg("Compare reprojections : %s\n" % (compare_repro)) print_msg("Compare ref free class avgs : %s\n" % (compare_ref_free)) print_msg("Use cutoff from ref free : %s\n" % (ref_free_cutoff)) if protos: print_msg("Protofilament numbers : %s\n" % (proto)) print_msg("Using helical search range : %s\n" % hsearch) if findseam is True: print_msg("Using seam-based reconstruction\n") if hpars != "-1": print_msg("Using hpars : %s\n" % hpars) if vertstep != None: print_msg("Using vertical step : %.2f\n" % vertstep) if save_half is True: print_msg("Saving even/odd halves\n") for i in xrange(100): print_msg("*") print_msg("\n\n") if maskfile: if type(maskfile) is types.StringType: mask3D = get_image(maskfile) else: mask3D = maskfile else: mask3D = model_circle(last_ring, nx, nx, nx) numr = Numrinit(first_ring, last_ring, rstep, "F") mask2D = model_circle(last_ring, nx, nx) - model_circle(first_ring, nx, nx) fscmask = model_circle(last_ring, nx, nx, nx) if CTF: from filter import filt_ctf from reconstruction_rjh import rec3D_MPI_noCTF if myid == main_node: active = EMUtil.get_all_attributes(stack, 'active') list_of_particles = [] for im in xrange(len(active)): if active[im]: list_of_particles.append(im) del active nima = len(list_of_particles) else: nima = 0 total_nima = bcast_number_to_all(nima, source_node=main_node) if myid != main_node: list_of_particles = [-1] * total_nima list_of_particles = bcast_list_to_all(list_of_particles, source_node=main_node) image_start, image_end = MPI_start_end(total_nima, number_of_proc, myid) # create a list of images for each node list_of_particles = list_of_particles[image_start:image_end] nima = len(list_of_particles) if debug: finfo.write("image_start, image_end: %d %d\n" % (image_start, image_end)) finfo.flush() data = EMData.read_images(stack, list_of_particles) t_zero = Transform({ "type": "spider", "phi": 0, "theta": 0, "psi": 0, "tx": 0, "ty": 0 }) transmulti = [[t_zero for i in xrange(nrefs)] for j in xrange(nima)] for iref, im in ((iref, im) for iref in xrange(nrefs) for im in xrange(nima)): if nrefs == 1: transmulti[im][iref] = data[im].get_attr("xform.projection") else: # if multi models, keep track of eulers for all models try: transmulti[im][iref] = data[im].get_attr("eulers_txty.%i" % iref) except: data[im].set_attr("eulers_txty.%i" % iref, t_zero) scoremulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)] pixelmulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)] ref_res = [0.0 for x in xrange(nrefs)] apix = data[0].get_attr('apix_x') # for oplane parameter, create cylindrical mask if oplane is not None and myid == main_node: from hfunctions import createCylMask cmaskf = os.path.join(outdir, "mask3D_cyl.mrc") mask3D = createCylMask(data, olmask, lmask, ilmask, cmaskf) # if finding seam of helix, create wedge masks if findseam is True: wedgemask = [] for pf in xrange(nrefs): wedgemask.append(EMData()) # wedgemask option if wcmask is not None: wcmask = get_input_from_string(wcmask) if len(wcmask) != 3: print_msg( "Error: wcmask option requires 3 values: x y radius") sys.exit() # determine if particles have helix info: try: data[0].get_attr('h_angle') original_data = [] boxmask = True from hfunctions import createBoxMask except: boxmask = False # prepare particles for im in xrange(nima): data[im].set_attr('ID', list_of_particles[im]) data[im].set_attr('pix_score', int(0)) if CTF: # only phaseflip particles, not full CTF correction ctf_params = data[im].get_attr("ctf") st = Util.infomask(data[im], mask2D, False) data[im] -= st[0] data[im] = filt_ctf(data[im], ctf_params, sign=-1, binary=1) data[im].set_attr('ctf_applied', 1) # for window mask: if boxmask is True: h_angle = data[im].get_attr("h_angle") original_data.append(data[im].copy()) bmask = createBoxMask(nx, apix, ou, lmask, h_angle) data[im] *= bmask del bmask if debug: finfo.write('%d loaded \n' % nima) finfo.flush() if myid == main_node: # initialize data for the reference preparation function ref_data = [mask3D, max(center, 0), None, None, None, None] # for method -1, switch off centering in user function from time import time # this is needed for gathering of pixel errors disps = [] recvcount = [] disps_score = [] recvcount_score = [] for im in xrange(number_of_proc): if (im == main_node): disps.append(0) disps_score.append(0) else: disps.append(disps[im - 1] + recvcount[im - 1]) disps_score.append(disps_score[im - 1] + recvcount_score[im - 1]) ib, ie = MPI_start_end(total_nima, number_of_proc, im) recvcount.append(ie - ib) recvcount_score.append((ie - ib) * nrefs) pixer = [0.0] * nima cs = [0.0] * 3 total_iter = 0 volodd = EMData.read_images(ref_vol, xrange(nrefs)) voleve = EMData.read_images(ref_vol, xrange(nrefs)) if restart: # recreate initial volumes from alignments stored in header itout = "000_00" for iref in xrange(nrefs): if (nrefs == 1): modout = "" else: modout = "_model_%02d" % (iref) if (sort): group = iref for im in xrange(nima): imgroup = data[im].get_attr('group') if imgroup == iref: data[im].set_attr('xform.projection', transmulti[im][iref]) else: group = int(999) for im in xrange(nima): data[im].set_attr('xform.projection', transmulti[im][iref]) fscfile = os.path.join(outdir, "fsc_%s%s" % (itout, modout)) vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF( data, sym, fscmask, fscfile, myid, main_node, index=group, npad=recon_pad) if myid == main_node: if helicalrecon: from hfunctions import processHelicalVol vstep = None if vertstep is not None: vstep = (vdp[iref], vdphi[iref]) print_msg( "Old rise and twist for model %i : %8.3f, %8.3f\n" % (iref, dp[iref], dphi[iref])) hvals = processHelicalVol(vol[iref], voleve[iref], volodd[iref], iref, outdir, itout, dp[iref], dphi[iref], apix, hsearch, findseam, vstep, wcmask) (vol[iref], voleve[iref], volodd[iref], dp[iref], dphi[iref], vdp[iref], vdphi[iref]) = hvals print_msg( "New rise and twist for model %i : %8.3f, %8.3f\n" % (iref, dp[iref], dphi[iref])) # get new FSC from symmetrized half volumes fscc = fsc_mask(volodd[iref], voleve[iref], mask3D, rstep, fscfile) else: vol[iref].write_image( os.path.join(outdir, "vol_%s.hdf" % itout), -1) if save_half is True: volodd[iref].write_image( os.path.join(outdir, "volodd_%s.hdf" % itout), -1) voleve[iref].write_image( os.path.join(outdir, "voleve_%s.hdf" % itout), -1) if nmasks > 1: # Read mask for multiplying ref_data[0] = maskF[iref] ref_data[2] = vol[iref] ref_data[3] = fscc # call user-supplied function to prepare reference image, i.e., center and filter it vol[iref], cs, fl = ref_ali3d(ref_data) vol[iref].write_image( os.path.join(outdir, "volf_%s.hdf" % (itout)), -1) if (apix == 1): res_msg = "Models filtered at spatial frequency of:\t" res = fl else: res_msg = "Models filtered at resolution of: \t" res = apix / fl ares = array2string(array(res), precision=2) print_msg("%s%s\n\n" % (res_msg, ares)) bcast_EMData_to_all(vol[iref], myid, main_node) # write out headers, under MPI writing has to be done sequentially mpi_barrier(MPI_COMM_WORLD) # projection matching for N_step in xrange(lstp): terminate = 0 Iter = -1 while (Iter < max_iter - 1 and terminate == 0): Iter += 1 total_iter += 1 itout = "%03g_%02d" % (delta[N_step], Iter) if myid == main_node: print_msg( "ITERATION #%3d, inner iteration #%3d\nDelta = %4.1f, an = %5.2f, xrange = %5.2f, yrange = %5.2f, step = %5.2f\n\n" % (N_step, Iter, delta[N_step], an[N_step], xrng[N_step], yrng[N_step], step[N_step])) for iref in xrange(nrefs): if myid == main_node: start_time = time() volft, kb = prep_vol(vol[iref]) ## constrain projections to out of plane parameter theta1 = None theta2 = None if oplane is not None: theta1 = 90 - oplane theta2 = 90 + oplane refrings = prepare_refrings(volft, kb, nx, delta[N_step], ref_a, sym, numr, MPI=True, phiEqpsi="Minus", initial_theta=theta1, delta_theta=theta2) del volft, kb if myid == main_node: print_msg( "Time to prepare projections for model %i: %s\n" % (iref, legibleTime(time() - start_time))) start_time = time() for im in xrange(nima): data[im].set_attr("xform.projection", transmulti[im][iref]) if an[N_step] == -1: t1, peak, pixer[im] = proj_ali_incore( data[im], refrings, numr, xrng[N_step], yrng[N_step], step[N_step], finfo) else: t1, peak, pixer[im] = proj_ali_incore_local( data[im], refrings, numr, xrng[N_step], yrng[N_step], step[N_step], an[N_step], finfo) #data[im].set_attr("xform.projection"%iref, t1) if nrefs > 1: data[im].set_attr("eulers_txty.%i" % iref, t1) scoremulti[im][iref] = peak from pixel_error import max_3D_pixel_error # t1 is the current param, t2 is old t2 = transmulti[im][iref] pixelmulti[im][iref] = max_3D_pixel_error(t1, t2, numr[-3]) transmulti[im][iref] = t1 if myid == main_node: print_msg("Time of alignment for model %i: %s\n" % (iref, legibleTime(time() - start_time))) start_time = time() # gather scoring data from all processors from mpi import mpi_gatherv scoremultisend = sum(scoremulti, []) pixelmultisend = sum(pixelmulti, []) tmp = mpi_gatherv(scoremultisend, len(scoremultisend), MPI_FLOAT, recvcount_score, disps_score, MPI_FLOAT, main_node, MPI_COMM_WORLD) tmp1 = mpi_gatherv(pixelmultisend, len(pixelmultisend), MPI_FLOAT, recvcount_score, disps_score, MPI_FLOAT, main_node, MPI_COMM_WORLD) tmp = mpi_bcast(tmp, (total_nima * nrefs), MPI_FLOAT, 0, MPI_COMM_WORLD) tmp1 = mpi_bcast(tmp1, (total_nima * nrefs), MPI_FLOAT, 0, MPI_COMM_WORLD) tmp = map(float, tmp) tmp1 = map(float, tmp1) score = array(tmp).reshape(-1, nrefs) pixelerror = array(tmp1).reshape(-1, nrefs) score_local = array(scoremulti) mean_score = score.mean(axis=0) std_score = score.std(axis=0) cut = mean_score - (cutoff * std_score) cut2 = mean_score + (cutoff * std_score) res_max = score_local.argmax(axis=1) minus_cc = [0.0 for x in xrange(nrefs)] minus_pix = [0.0 for x in xrange(nrefs)] minus_ref = [0.0 for x in xrange(nrefs)] #output pixel errors if (myid == main_node): from statistics import hist_list lhist = 20 pixmin = pixelerror.min(axis=1) region, histo = hist_list(pixmin, lhist) if (region[0] < 0.0): region[0] = 0.0 print_msg( "Histogram of pixel errors\n ERROR number of particles\n" ) for lhx in xrange(lhist): print_msg(" %10.3f %7d\n" % (region[lhx], histo[lhx])) # Terminate if 95% within 1 pixel error im = 0 for lhx in xrange(lhist): if (region[lhx] > 1.0): break im += histo[lhx] print_msg("Percent of particles with pixel error < 1: %f\n\n" % (im / float(total_nima) * 100)) term_cond = float(term) / 100 if (im / float(total_nima) > term_cond): terminate = 1 print_msg("Terminating internal loop\n") del region, histo terminate = mpi_bcast(terminate, 1, MPI_INT, 0, MPI_COMM_WORLD) terminate = int(terminate[0]) for im in xrange(nima): if (sort == False): data[im].set_attr('group', 999) elif (mjump[N_step] == 1): data[im].set_attr('group', int(res_max[im])) pix_run = data[im].get_attr('pix_score') if (pix_cutoff[N_step] == 1 and (terminate == 1 or Iter == max_iter - 1)): if (pixelmulti[im][int(res_max[im])] > 1): data[im].set_attr('pix_score', int(777)) if (score_local[im][int(res_max[im])] < cut[int( res_max[im])]) or (two_tail and score_local[im][int( res_max[im])] > cut2[int(res_max[im])]): data[im].set_attr('group', int(888)) minus_cc[int(res_max[im])] = minus_cc[int(res_max[im])] + 1 if (pix_run == 777): data[im].set_attr('group', int(777)) minus_pix[int( res_max[im])] = minus_pix[int(res_max[im])] + 1 if (compare_ref_free != "-1") and (ref_free_cutoff[N_step] != -1) and (total_iter > 1): id = data[im].get_attr('ID') if id in rejects: data[im].set_attr('group', int(666)) minus_ref[int( res_max[im])] = minus_ref[int(res_max[im])] + 1 minus_cc_tot = mpi_reduce(minus_cc, nrefs, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD) minus_pix_tot = mpi_reduce(minus_pix, nrefs, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD) minus_ref_tot = mpi_reduce(minus_ref, nrefs, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD) if (myid == main_node): if (sort): tot_max = score.argmax(axis=1) res = bincount(tot_max) else: res = ones(nrefs) * total_nima print_msg("Particle distribution: \t\t%s\n" % (res * 1.0)) afcut1 = res - minus_cc_tot afcut2 = afcut1 - minus_pix_tot afcut3 = afcut2 - minus_ref_tot print_msg("Particle distribution after cc cutoff:\t\t%s\n" % (afcut1)) print_msg("Particle distribution after pix cutoff:\t\t%s\n" % (afcut2)) print_msg("Particle distribution after ref cutoff:\t\t%s\n\n" % (afcut3)) res = [0.0 for i in xrange(nrefs)] for iref in xrange(nrefs): if (center == -1): from utilities import estimate_3D_center_MPI, rotate_3D_shift dummy = EMData() cs[0], cs[1], cs[2], dummy, dummy = estimate_3D_center_MPI( data, total_nima, myid, number_of_proc, main_node) cs = mpi_bcast(cs, 3, MPI_FLOAT, main_node, MPI_COMM_WORLD) cs = [-float(cs[0]), -float(cs[1]), -float(cs[2])] rotate_3D_shift(data, cs) if (sort): group = iref for im in xrange(nima): imgroup = data[im].get_attr('group') if imgroup == iref: data[im].set_attr('xform.projection', transmulti[im][iref]) else: group = int(999) for im in xrange(nima): data[im].set_attr('xform.projection', transmulti[im][iref]) if (nrefs == 1): modout = "" else: modout = "_model_%02d" % (iref) fscfile = os.path.join(outdir, "fsc_%s%s" % (itout, modout)) vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF( data, sym, fscmask, fscfile, myid, main_node, index=group, npad=recon_pad) if myid == main_node: print_msg("3D reconstruction time for model %i: %s\n" % (iref, legibleTime(time() - start_time))) start_time = time() # Compute Fourier variance if fourvar: outvar = os.path.join(outdir, "volVar_%s.hdf" % (itout)) ssnr_file = os.path.join(outdir, "ssnr_%s" % (itout)) varf = varf3d_MPI(data, ssnr_text_file=ssnr_file, mask2D=None, reference_structure=vol[iref], ou=last_ring, rw=1.0, npad=1, CTF=None, sign=1, sym=sym, myid=myid) if myid == main_node: print_msg( "Time to calculate 3D Fourier variance for model %i: %s\n" % (iref, legibleTime(time() - start_time))) start_time = time() varf = 1.0 / varf varf.write_image(outvar, -1) else: varf = None if myid == main_node: if helicalrecon: from hfunctions import processHelicalVol vstep = None if vertstep is not None: vstep = (vdp[iref], vdphi[iref]) print_msg( "Old rise and twist for model %i : %8.3f, %8.3f\n" % (iref, dp[iref], dphi[iref])) hvals = processHelicalVol(vol[iref], voleve[iref], volodd[iref], iref, outdir, itout, dp[iref], dphi[iref], apix, hsearch, findseam, vstep, wcmask) (vol[iref], voleve[iref], volodd[iref], dp[iref], dphi[iref], vdp[iref], vdphi[iref]) = hvals print_msg( "New rise and twist for model %i : %8.3f, %8.3f\n" % (iref, dp[iref], dphi[iref])) # get new FSC from symmetrized half volumes fscc = fsc_mask(volodd[iref], voleve[iref], mask3D, rstep, fscfile) print_msg( "Time to search and apply helical symmetry for model %i: %s\n\n" % (iref, legibleTime(time() - start_time))) start_time = time() else: vol[iref].write_image( os.path.join(outdir, "vol_%s.hdf" % (itout)), -1) if save_half is True: volodd[iref].write_image( os.path.join(outdir, "volodd_%s.hdf" % (itout)), -1) voleve[iref].write_image( os.path.join(outdir, "voleve_%s.hdf" % (itout)), -1) if nmasks > 1: # Read mask for multiplying ref_data[0] = maskF[iref] ref_data[2] = vol[iref] ref_data[3] = fscc ref_data[4] = varf # call user-supplied function to prepare reference image, i.e., center and filter it vol[iref], cs, fl = ref_ali3d(ref_data) vol[iref].write_image( os.path.join(outdir, "volf_%s.hdf" % (itout)), -1) if (apix == 1): res_msg = "Models filtered at spatial frequency of:\t" res[iref] = fl else: res_msg = "Models filtered at resolution of: \t" res[iref] = apix / fl del varf bcast_EMData_to_all(vol[iref], myid, main_node) if compare_ref_free != "-1": compare_repro = True if compare_repro: outfile_repro = comp_rep(refrings, data, itout, modout, vol[iref], group, nima, nx, myid, main_node, outdir) mpi_barrier(MPI_COMM_WORLD) if compare_ref_free != "-1": ref_free_output = os.path.join( outdir, "ref_free_%s%s" % (itout, modout)) rejects = compare(compare_ref_free, outfile_repro, ref_free_output, yrng[N_step], xrng[N_step], rstep, nx, apix, ref_free_cutoff[N_step], number_of_proc, myid, main_node) # retrieve alignment params from all processors par_str = ['xform.projection', 'ID', 'group'] if nrefs > 1: for iref in xrange(nrefs): par_str.append('eulers_txty.%i' % iref) if myid == main_node: from utilities import recv_attr_dict recv_attr_dict(main_node, stack, data, par_str, image_start, image_end, number_of_proc) else: send_attr_dict(main_node, data, par_str, image_start, image_end) if myid == main_node: ares = array2string(array(res), precision=2) print_msg("%s%s\n\n" % (res_msg, ares)) dummy = EMData() if full_output: nimat = EMUtil.get_image_count(stack) output_file = os.path.join(outdir, "paramout_%s" % itout) foutput = open(output_file, 'w') for im in xrange(nimat): # save the parameters for each of the models outstring = "" dummy.read_image(stack, im, True) param3d = dummy.get_attr('xform.projection') g = dummy.get_attr("group") # retrieve alignments in EMAN-format pE = param3d.get_params('eman') outstring += "%f\t%f\t%f\t%f\t%f\t%i\n" % ( pE["az"], pE["alt"], pE["phi"], pE["tx"], pE["ty"], g) foutput.write(outstring) foutput.close() del dummy mpi_barrier(MPI_COMM_WORLD) # mpi_finalize() if myid == main_node: print_end_msg("ali3d_MPI")
def main(args): # parser1 = argparse.ArgumentParser(description='This program is used to window particles from a micrograph. The coordinates of the particles are given as input.') # parser1.add_argument() progname = os.path.basename(sys.argv[0]) usage = progname + " micrographs_list --coords_dir=coords_dir --coords_suffix=coords_suffix" + \ " --coords_extension=coords_extension --coords_format=coords_format" + \ " --indir=input_dir --importctf=ctf_file --limitctf" + \ " --resample_ratio=resample_ratio --box_size=box_size" + \ " --outdir=outdir --outsuffix=outsuffix --micsuffix=micsuffix" + \ " --nameroot=nameroot --invert" + \ " --defocuserror=defocuserror --astigmatismerror=astigmatismerror" parser = OptionParser(usage, version=SPARXVERSION) parser.add_option( "--coords_dir", type="string", default=".", help= "<Coordinates Directory> Directory containing files with particle coordinates. (Default: current directory)" ) parser.add_option( "--coords_suffix", type="string", default="", help= "<Coordinates File Suffix> Suffix of coordinate files. For example '_ptcls'. " ) parser.add_option( "--coords_extension", type="string", default="box", help= "<Coordinates File Extension> File extension of coordinate files. e.g 'box' for eman1, 'json' for eman2, ..." ) parser.add_option( "--coords_format", type="string", default="eman1", help= "<Coordinates File Format> Format of coordinates file: 'sparx', 'eman1', 'eman2', or 'spider'. The coordinates of sparx, eman2, and spider format is particle center. The coordinates of eman1 format is particle box conner associated with the original box size." ) parser.add_option( "--indir", type="string", default=".", help= "<Micrograph Directory> Directory containing micrographs to be processed. (Default: current directory)" ) parser.add_option( "--nameroot", type="string", default="", help= "<Micrograph Root Name> Root name (Prefix) of micrographs to be processed." ) parser.add_option( "--micsuffix", type="string", default="hdf", help= "<Micrograph Extension > A string denoting micrograph type. (Default 'hdf')" ) parser.add_option( "--outdir", type="string", default=".", help="<Output Directory> Output directory (Default: current directory)" ) parser.add_option( "--outsuffix", type="string", default="_ptcls", help="<Output File Suffix> Suffix for output stack. (Default '_ptcls')" ) parser.add_option( "--importctf", type="string", default="", help="<CTER CTF File> File name with CTF parameters produced by sxcter." ) parser.add_option( "--box_size", type="int", default=256, help= "<Box Size> x and y dimension in pixels of square area to be windowed. Pixel size after resampling is assumed when resample_ratio < 1.0 (Default 256)" ) parser.add_option( "--invert", action="store_true", default=False, help= "<Invert Contrast> Invert image contrast (recommended for cryo data) (Default, no contrast inversion)" ) parser.add_option( "--resample_ratio", type="float", default=1.0, help= "<Resample Ratio> Ratio of new to old image size (or old to new pixel size) for resampling. Valid range is 0.0 < resample_ratio <= 1.0. (Default: 1.0) (advanced)" ) parser.add_option( "--limitctf", action="store_true", default=False, help= "<Apply CTF-Limit Filter> Filter micrographs based on the CTF limit. It requires --importctf. (Default: no filter) (advanced)" ) parser.add_option( "--defocuserror", type="float", default=1000000.0, help= "<Defocus Error Limit> Exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocuserror percent. The error is computed as (std dev defocus)/defocus*100%. (Default: include all irrespective of error values.) (advanced)" ) parser.add_option( "--astigmatismerror", type="float", default=360.0, help= "<Astigmatism Error Limit> Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatismerror degrees. (Default: include all irrespective of error values.) (advanced)" ) # must be switched off in production # parser.add_option("--use_latest_master_directory", action="store_true", dest="use_latest_master_directory", default=False) # # parser.add_option("--restart_section", type="string", default="", help="<restart section name> (no spaces) followed immediately by comma, followed immediately by generation to restart, example: \n--restart_section=candidate_class_averages,1 (Sections: restart, candidate_class_averages, reproducible_class_averages)") # parser.add_option("--stop_after_candidates", action="store_true", default=False, help="<stop_after_candidates> stops after the 'candidate_class_averages' section") # parser.add_option("--return_options", action="store_true", dest="return_options", default=False, help=SUPPRESS_HELP) (options, args) = parser.parse_args(args) if options.return_options: return parser # Set local constants box_size = options.box_size box_half = box_size // 2 options.micsuffix = "." + options.micsuffix cterr = [options.defocuserror / 100.0, options.astigmatismerror] check_options(options, progname) extension_coord = options.coords_suffix + "." + options.coords_extension # Build micrograph basename list micnames = build_micnames(options, args) print_msg('Detected micrographs : %6d ...\n' % (len(micnames))) # If there is no micrographs, exit if len(micnames) == 0: print usage sys.exit() # Load CTFs n_reject_defocus_error = 0 if options.importctf != None: ctfs0 = read_text_row(options.importctf) print_msg('Detected CTF entries : %6d ...\n' % (len(ctfs0))) ctfs = {} for i in xrange(len(ctfs0)): ctf = ctfs0[i] basemic = baseroot(ctf[-1]) if (ctf[8] / ctf[0] > cterr[0]): print_msg( 'Defocus error %f exceeds the threshold. Micrograph %s rejected.\n' % (ctf[8] / ctf[0], basemic)) n_reject_defocus_error += 1 else: if (ctf[10] > cterr[1]): ctf[6] = 0.0 ctf[7] = 0.0 ctfs[basemic] = ctf print_msg('Rejected micrographs by defocus error : %6d ...\n' % (n_reject_defocus_error)) # Create circular 2D mask for ... mask = model_circle(box_size // 2, box_size, box_size) # Prepare loop variables n_micrographs_process = 0 n_micrographs_reject_no_micrograph = 0 n_micrographs_reject_no_coordinates = 0 n_micrographs_reject_no_cter_entry = 0 n_total_coordinates_detect = 0 n_total_coordinates_process = 0 n_total_coordinates_reject_out_of_boundary = 0 cutoffhistogram = [ ] #@ming compute the histogram for micrographs cut of by ctf limit. # Loop over micrographs for k in range(len(micnames)): # basename is name of micrograph minus the path and extension # Here, assuming micrograph and coordinates have the same file basename basename = micnames[k] f_mic = os.path.join(os.path.abspath(options.indir), basename + options.micsuffix) f_info = os.path.join(options.coords_dir, basename + extension_coord) # CHECKS: BEGIN # IF micrograph exists if not os.path.exists(f_mic): print_msg(' Cannot read %s. Skipping %s ...\n' % (f_mic, basename)) n_micrographs_reject_no_micrograph += 1 continue # IF coordinates file exists if not os.path.exists(f_info): print_msg(' Cannot read %s. Skipping %s ...\n' % (f_info, basename)) n_micrographs_reject_no_coordinates += 1 continue # IF micrograph is in CTER results if options.importctf != None: if basename not in ctfs: print_msg( ' Is not listed in CTER results, skipping %s...\n' % (basename)) n_micrographs_reject_no_cter_entry += 1 continue else: ctf = ctfs[basename] # CHECKS: END n_micrographs_process += 1 print_msg('\n') print_msg( 'Processing micrograph %s... Path: %s... Coordinates file %s\n' % (basename, f_mic, f_info)) # Read coordinates according to the specified format and # make the coordinates the center of particle image if options.coords_format.lower() == 'sparx': coords = read_text_row(f_info) elif options.coords_format.lower() == 'eman1': coords = read_text_row(f_info) for i in range(len(coords)): coords[i] = [ coords[i][0] + coords[i][2] // 2, coords[i][1] + coords[i][3] // 2 ] elif options.coords_format.lower() == 'eman2': coords = js_open_dict(f_info)["boxes"] for i in range(len(coords)): coords[i] = [coords[i][0], coords[i][1]] elif options.coords_format.lower() == 'spider': coords = read_text_row(f_info) for i in range(len(coords)): coords[i] = [coords[i][2], coords[i][3]] else: assert (False) # Unreachable code # Load micrograph from the file immic = get_im(f_mic) # Calculate the new pixel size resample_ratio = options.resample_ratio if options.importctf != None: pixel_size_orig = ctf[3] if resample_ratio < 1.0: assert (resample_ratio > 0.0) new_pixel_size = pixel_size_orig / resample_ratio print_msg( 'Resample micrograph to pixel size %6.4f and window segments from resampled micrograph\n' % new_pixel_size) else: # assert(resample_ratio == 1.0) new_pixel_size = pixel_size_orig # Set ctf along with new pixel size in resampled micrograph ctf[3] = new_pixel_size else: assert (options.importctf == None) if resample_ratio < 1.0: assert (resample_ratio > 0.0) print_msg( 'Resample micrograph with ratio %6.4f and window segments from resampled micrograph\n' % resample_ratio) # else: # assert(resample_ratio == 1.0) # Apply filters to micrograph fftip(immic) if options.limitctf: assert (options.importctf != None) # Cut off frequency components higher than CTF limit q1, q2 = ctflimit(box_size, ctf[0], ctf[1], ctf[2], new_pixel_size) # This is absolute frequency of the CTF limit in the scale of original micrograph if resample_ratio < 1.0: assert (resample_ratio > 0.0) q1 = resample_ratio * q1 / float( box_size ) # q1 = (pixel_size_orig / new_pixel_size) * q1/float(box_size) else: # assert(resample_ratio == 1.0) -> pixel_size_orig == new_pixel_size -> pixel_size_orig / new_pixel_size == 1.0 q1 = q1 / float(box_size) if q1 < 0.5: #@ming immic = filt_tanl(immic, q1, 0.01) cutoffhistogram.append(q1) # Cut off frequency components lower than the box size can express immic = fft(filt_gaussh(immic, resample_ratio / box_size)) # Resample micrograph, map coordinates, and window segments from resampled micrograph using new coordinates # after resampling by resample_ratio, new pixel size will be pixel_size/resample_ratio = new_pixel_size # NOTE: 2015/04/13 Toshio Moriya # resample() efficiently takes care of the case resample_ratio = 1.0 but # it does not set apix_*. Even though it sets apix_* when resample_ratio < 1.0 ... immic = resample(immic, resample_ratio) if options.invert: stt = Util.infomask(immic, None, True) Util.mul_scalar(immic, -1.0) immic += 2 * stt[0] if options.importctf != None: from utilities import generate_ctf ctf = generate_ctf(ctf) # Prepare loop variables nx = immic.get_xsize() ny = immic.get_ysize() x0 = nx // 2 y0 = ny // 2 print_msg('\n') print_msg('Micrograph size := (%6d, %6d)\n' % (nx, ny)) otcl_images = "bdb:%s/" % options.outdir + basename + options.outsuffix ind = 0 n_coordinates_reject_out_of_boundary = 0 # Loop over coordinates for i in range(len(coords)): source_x = int(coords[i][0]) source_y = int(coords[i][1]) x = source_x y = source_y if resample_ratio < 1.0: assert (resample_ratio > 0.0) x = int(x * resample_ratio) y = int(y * resample_ratio) # else: # assert(resample_ratio == 1.0) if ((0 <= x - box_half) and (x + box_half <= nx) and (0 <= y - box_half) and (y + box_half <= ny)): imw = Util.window(immic, box_size, box_size, 1, x - x0, y - y0) else: print_msg( 'Coordinates ID = %04d (x = %4d, y = %4d, box_size = %4d) is out of micrograph bound, skipping ....\n' % (i, x, y, box_size)) n_coordinates_reject_out_of_boundary += 1 continue imw = ramp(imw) stat = Util.infomask(imw, mask, False) imw -= stat[0] imw /= stat[1] # NOTE: 2015/04/09 Toshio Moriya # ptcl_source_image might be redundant information ... # Consider re-organizing header entries... imw.set_attr("ptcl_source_image", f_mic) imw.set_attr("ptcl_source_coord_id", i) imw.set_attr("ptcl_source_coord", [source_x, source_y]) imw.set_attr("resample_ratio", resample_ratio) # NOTE: 2015/04/13 Toshio Moriya # apix_* attributes are updated by resample() only when resample_ratio != 1.0 # Let's make sure header info is consistent by setting apix_* = 1.0 # regardless of options, so it is not passed down the processing line imw.set_attr("apix_x", 1.0) imw.set_attr("apix_y", 1.0) imw.set_attr("apix_z", 1.0) if options.importctf != None: imw.set_attr("ctf", ctf) imw.set_attr("ctf_applied", 0) imw.set_attr("pixel_size_orig", pixel_size_orig) # imw.set_attr("apix_x", new_pixel_size) # imw.set_attr("apix_y", new_pixel_size) # imw.set_attr("apix_z", new_pixel_size) # NOTE: 2015/04/13 Toshio Moriya # Pawel Comment: Micrograph is not supposed to have CTF header info. # So, let's assume it does not exist & ignore its presence. # Note that resample() "correctly" updates pixel size of CTF header info if it exists # elif (imw.has_ctff()): # assert(options.importctf == None) # ctf_origin = imw.get_attr("ctf") # pixel_size_origin = round(ctf_origin.apix, 5) # Because SXCTER ouputs up to 5 digits # imw.set_attr("apix_x",pixel_size_origin) # imw.set_attr("apix_y",pixel_size_origin) # imw.set_attr("apix_z",pixel_size_origin) imw.write_image(otcl_images, ind) ind += 1 n_total_coordinates_detect += len(coords) n_total_coordinates_process += ind n_total_coordinates_reject_out_of_boundary += n_coordinates_reject_out_of_boundary # Print out the summary of this micrograph print_msg('\n') print_msg('Micrograph summary of coordinates...\n') print_msg('Detected : %4d\n' % (len(coords))) print_msg('Processed : %4d\n' % (ind)) print_msg('Rejected by out of boundary : %4d\n' % (n_coordinates_reject_out_of_boundary)) if options.limitctf: # Print out the summary of CTF-limit filtering print_msg('\n') print_msg('Global summary of CTF-limit filtering (--limitctf) ...\n') print_msg('Percentage of filtered micrographs: %8.2f\n' % (len(cutoffhistogram) * 100.0 / len(micnames))) lhist = 10 if len(cutoffhistogram) >= lhist: from statistics import hist_list region, hist = hist_list(cutoffhistogram, lhist) print_msg(" Histogram of cut off frequency\n") print_msg(" ERROR number of frequencies\n") for lhx in xrange(lhist): print_msg( " %14.7f %7d\n" % (region[lhx], hist[lhx]) ) # print_msg(" %10.3f %7d\n" % (region[lhx], hist[lhx])) else: print_msg( "The number of filtered micrographs (%d) is less than the number of bins (%d). No histogram is produced.\n" % (len(cutoffhistogram), lhist)) # Print out the summary of all micrographs print_msg('\n') print_msg('Global summary of micrographs ...\n') print_msg('Detected : %6d\n' % (len(micnames))) print_msg('Processed : %6d\n' % (n_micrographs_process)) print_msg('Rejected by no micrograph file : %6d\n' % (n_micrographs_reject_no_micrograph)) print_msg('Rejected by no coordinates file : %6d\n' % (n_micrographs_reject_no_coordinates)) print_msg('Rejected by no CTER entry : %6d\n' % (n_micrographs_reject_no_cter_entry)) print_msg('\n') print_msg('Global summary of coordinates ...\n') print_msg('Detected : %6d\n' % (n_total_coordinates_detect)) print_msg('Processed : %6d\n' % (n_total_coordinates_process)) print_msg('Rejected by out of boundary : %6d\n' % (n_total_coordinates_reject_out_of_boundary)) print_msg('\n')
def main(args): # parser1 = argparse.ArgumentParser(description='This program is used to window particles from a micrograph. The coordinates of the particles are given as input.') # parser1.add_argument() progname = os.path.basename(sys.argv[0]) usage = progname + " micrographs_list --coords_dir=coords_dir --coords_suffix=coords_suffix" + \ " --coords_extension=coords_extension --coords_format=coords_format" + \ " --indir=input_dir --importctf=ctf_file --limitctf" + \ " --resample_ratio=resample_ratio --box_size=box_size" + \ " --outdir=outdir --outsuffix=outsuffix --micsuffix=micsuffix" + \ " --nameroot=nameroot --invert" + \ " --defocuserror=defocuserror --astigmatismerror=astigmatismerror" parser = OptionParser(usage, version=SPARXVERSION) parser.add_option("--coords_dir", type="string", default=".", help="<Coordinates Directory> Directory containing files with particle coordinates. (Default: current directory)") parser.add_option("--coords_suffix", type="string", default="", help="<Coordinates File Suffix> Suffix of coordinate files. For example '_ptcls'. ") parser.add_option("--coords_extension", type="string", default="box", help="<Coordinates File Extension> File extension of coordinate files. e.g 'box' for eman1, 'json' for eman2, ...") parser.add_option("--coords_format", type="string", default="eman1", help="<Coordinates File Format> Format of coordinates file: 'sparx', 'eman1', 'eman2', or 'spider'. The coordinates of sparx, eman2, and spider format is particle center. The coordinates of eman1 format is particle box conner associated with the original box size.") parser.add_option("--indir", type="string", default=".", help="<Micrograph Directory> Directory containing micrographs to be processed. (Default: current directory)") parser.add_option("--nameroot", type="string", default="", help="<Micrograph Root Name> Root name (Prefix) of micrographs to be processed.") parser.add_option("--micsuffix", type="string", default="hdf", help="<Micrograph Extension > A string denoting micrograph type. (Default 'hdf')") parser.add_option("--outdir", type="string", default=".", help="<Output Directory> Output directory (Default: current directory)") parser.add_option("--outsuffix", type="string", default="_ptcls", help="<Output File Suffix> Suffix for output stack. (Default '_ptcls')") parser.add_option("--importctf", type="string", default="", help="<CTER CTF File> File name with CTF parameters produced by sxcter.") parser.add_option("--box_size", type="int", default=256, help="<Box Size> x and y dimension in pixels of square area to be windowed. Pixel size after resampling is assumed when resample_ratio < 1.0 (Default 256)") parser.add_option("--invert", action="store_true", default=False, help="<Invert Contrast> Invert image contrast (recommended for cryo data) (Default, no contrast inversion)") parser.add_option("--resample_ratio", type="float", default=1.0, help="<Resample Ratio> Ratio of new to old image size (or old to new pixel size) for resampling. Valid range is 0.0 < resample_ratio <= 1.0. (Default: 1.0) (advanced)") parser.add_option("--limitctf", action="store_true", default=False, help="<Apply CTF-Limit Filter> Filter micrographs based on the CTF limit. It requires --importctf. (Default: no filter) (advanced)") parser.add_option("--defocuserror", type="float", default=1000000.0, help="<Defocus Error Limit> Exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocuserror percent. The error is computed as (std dev defocus)/defocus*100%. (Default: include all irrespective of error values.) (advanced)" ) parser.add_option("--astigmatismerror", type="float", default=360.0, help="<Astigmatism Error Limit> Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatismerror degrees. (Default: include all irrespective of error values.) (advanced)") # must be switched off in production # parser.add_option("--use_latest_master_directory", action="store_true", dest="use_latest_master_directory", default=False) # # parser.add_option("--restart_section", type="string", default="", help="<restart section name> (no spaces) followed immediately by comma, followed immediately by generation to restart, example: \n--restart_section=candidate_class_averages,1 (Sections: restart, candidate_class_averages, reproducible_class_averages)") # parser.add_option("--stop_after_candidates", action="store_true", default=False, help="<stop_after_candidates> stops after the 'candidate_class_averages' section") # parser.add_option("--return_options", action="store_true", dest="return_options", default=False, help = SUPPRESS_HELP) (options, args) = parser.parse_args(args) if options.return_options: return parser # Set local constants box_size = options.box_size box_half = box_size // 2 options.micsuffix = "." + options.micsuffix cterr = [options.defocuserror/100.0, options.astigmatismerror] check_options(options, progname) extension_coord = options.coords_suffix + "." + options.coords_extension # Build micrograph basename list micnames = build_micnames(options, args) print_msg('Detected micrographs : %6d ...\n' % (len(micnames))) # If there is no micrographs, exit if len(micnames) == 0: print usage sys.exit() # Load CTFs n_reject_defocus_error = 0 if options.importctf != None: ctfs0 = read_text_row(options.importctf) print_msg('Detected CTF entries : %6d ...\n' % (len(ctfs0))) ctfs={} for i in xrange(len(ctfs0)): ctf=ctfs0[i] basemic = baseroot(ctf[-1]) if(ctf[8]/ctf[0] > cterr[0]): print_msg('Defocus error %f exceeds the threshold. Micrograph %s rejected.\n' % (ctf[8]/ctf[0], basemic)) n_reject_defocus_error += 1 else: if(ctf[10] > cterr[1] ): ctf[6] = 0.0 ctf[7] = 0.0 ctfs[basemic] = ctf print_msg('Rejected micrographs by defocus error : %6d ...\n' % (n_reject_defocus_error)) # Create circular 2D mask for ... mask = model_circle(box_size//2, box_size, box_size) # Prepare loop variables n_micrographs_process = 0 n_micrographs_reject_no_micrograph = 0 n_micrographs_reject_no_coordinates = 0 n_micrographs_reject_no_cter_entry = 0 n_total_coordinates_detect = 0 n_total_coordinates_process = 0 n_total_coordinates_reject_out_of_boundary = 0 cutoffhistogram = [] #@ming compute the histogram for micrographs cut of by ctf limit. # Loop over micrographs for k in range(len(micnames)): # basename is name of micrograph minus the path and extension # Here, assuming micrograph and coordinates have the same file basename basename = micnames[k] f_mic = os.path.join(os.path.abspath(options.indir), basename + options.micsuffix) f_info = os.path.join(options.coords_dir, basename + extension_coord) # CHECKS: BEGIN # IF micrograph exists if not os.path.exists(f_mic): print_msg(' Cannot read %s. Skipping %s ...\n' % (f_mic, basename)) n_micrographs_reject_no_micrograph += 1 continue # IF coordinates file exists if not os.path.exists(f_info): print_msg(' Cannot read %s. Skipping %s ...\n' % (f_info, basename)) n_micrographs_reject_no_coordinates += 1 continue # IF micrograph is in CTER results if options.importctf != None: if basename not in ctfs: print_msg(' Is not listed in CTER results, skipping %s...\n' % (basename)) n_micrographs_reject_no_cter_entry += 1 continue else: ctf = ctfs[basename] # CHECKS: END n_micrographs_process += 1 print_msg('\n') print_msg('Processing micrograph %s... Path: %s... Coordinates file %s\n' % (basename, f_mic, f_info)) # Read coordinates according to the specified format and # make the coordinates the center of particle image if options.coords_format.lower() == 'sparx' : coords = read_text_row(f_info) elif options.coords_format.lower() == 'eman1': coords = read_text_row(f_info) for i in range(len(coords)): coords[i] = [coords[i][0] + coords[i][2]//2 ,coords[i][1] + coords[i][3]//2] elif options.coords_format.lower() == 'eman2': coords = js_open_dict(f_info)["boxes"] for i in range(len(coords)): coords[i] = [coords[i][0],coords[i][1]] elif options.coords_format.lower() == 'spider': coords = read_text_row(f_info) for i in range(len(coords)): coords[i] = [coords[i][2] ,coords[i][3]] else: assert(False) # Unreachable code # Load micrograph from the file immic = get_im(f_mic) # Calculate the new pixel size resample_ratio = options.resample_ratio if options.importctf != None: pixel_size_orig = ctf[3] if resample_ratio < 1.0: assert(resample_ratio > 0.0) new_pixel_size = pixel_size_orig / resample_ratio print_msg('Resample micrograph to pixel size %6.4f and window segments from resampled micrograph\n' % new_pixel_size) else: # assert(resample_ratio == 1.0) new_pixel_size = pixel_size_orig # Set ctf along with new pixel size in resampled micrograph ctf[3] = new_pixel_size else: assert(options.importctf == None) if resample_ratio < 1.0: assert(resample_ratio > 0.0) print_msg('Resample micrograph with ratio %6.4f and window segments from resampled micrograph\n' % resample_ratio) # else: # assert(resample_ratio == 1.0) # Apply filters to micrograph fftip(immic) if options.limitctf: assert(options.importctf != None) # Cut off frequency components higher than CTF limit q1, q2 = ctflimit(box_size,ctf[0],ctf[1],ctf[2],new_pixel_size) # This is absolute frequency of the CTF limit in the scale of original micrograph if resample_ratio < 1.0: assert(resample_ratio > 0.0) q1 = resample_ratio * q1 / float(box_size) # q1 = (pixel_size_orig / new_pixel_size) * q1/float(box_size) else: # assert(resample_ratio == 1.0) -> pixel_size_orig == new_pixel_size -> pixel_size_orig / new_pixel_size == 1.0 q1 = q1 / float(box_size) if q1 < 0.5: #@ming immic = filt_tanl(immic, q1, 0.01) cutoffhistogram.append(q1) # Cut off frequency components lower than the box size can express immic = fft(filt_gaussh( immic, resample_ratio/box_size )) # Resample micrograph, map coordinates, and window segments from resampled micrograph using new coordinates # after resampling by resample_ratio, new pixel size will be pixel_size/resample_ratio = new_pixel_size # NOTE: 2015/04/13 Toshio Moriya # resample() efficiently takes care of the case resample_ratio = 1.0 but # it does not set apix_*. Even though it sets apix_* when resample_ratio < 1.0 ... immic = resample(immic, resample_ratio) if options.invert: stt = Util.infomask(immic, None, True) Util.mul_scalar(immic, -1.0) immic += 2*stt[0] if options.importctf != None: from utilities import generate_ctf ctf = generate_ctf(ctf) # Prepare loop variables nx = immic.get_xsize() ny = immic.get_ysize() x0 = nx//2 y0 = ny//2 print_msg('\n') print_msg('Micrograph size := (%6d, %6d)\n' % (nx, ny)) otcl_images = "bdb:%s/" % options.outdir + basename + options.outsuffix ind = 0 n_coordinates_reject_out_of_boundary = 0 # Loop over coordinates for i in range(len(coords)): source_x = int(coords[i][0]) source_y = int(coords[i][1]) x = source_x y = source_y if resample_ratio < 1.0: assert(resample_ratio > 0.0) x = int(x * resample_ratio) y = int(y * resample_ratio) # else: # assert(resample_ratio == 1.0) if( (0 <= x - box_half) and ( x + box_half <= nx ) and (0 <= y - box_half) and ( y + box_half <= ny ) ): imw = Util.window(immic, box_size, box_size, 1, x-x0, y-y0) else: print_msg('Coordinates ID = %04d (x = %4d, y = %4d, box_size = %4d) is out of micrograph bound, skipping ....\n' % (i, x, y, box_size)) n_coordinates_reject_out_of_boundary += 1 continue imw = ramp(imw) stat = Util.infomask( imw, mask, False ) imw -= stat[0] imw /= stat[1] # NOTE: 2015/04/09 Toshio Moriya # ptcl_source_image might be redundant information ... # Consider re-organizing header entries... imw.set_attr("ptcl_source_image", f_mic) imw.set_attr("ptcl_source_coord_id", i) imw.set_attr("ptcl_source_coord", [source_x, source_y]) imw.set_attr("resample_ratio", resample_ratio) # NOTE: 2015/04/13 Toshio Moriya # apix_* attributes are updated by resample() only when resample_ratio != 1.0 # Let's make sure header info is consistent by setting apix_* = 1.0 # regardless of options, so it is not passed down the processing line imw.set_attr("apix_x", 1.0) imw.set_attr("apix_y", 1.0) imw.set_attr("apix_z", 1.0) if options.importctf != None: imw.set_attr("ctf",ctf) imw.set_attr("ctf_applied", 0) imw.set_attr("pixel_size_orig", pixel_size_orig) # imw.set_attr("apix_x", new_pixel_size) # imw.set_attr("apix_y", new_pixel_size) # imw.set_attr("apix_z", new_pixel_size) # NOTE: 2015/04/13 Toshio Moriya # Pawel Comment: Micrograph is not supposed to have CTF header info. # So, let's assume it does not exist & ignore its presence. # Note that resample() "correctly" updates pixel size of CTF header info if it exists # elif (imw.has_ctff()): # assert(options.importctf == None) # ctf_origin = imw.get_attr("ctf") # pixel_size_origin = round(ctf_origin.apix, 5) # Because SXCTER ouputs up to 5 digits # imw.set_attr("apix_x",pixel_size_origin) # imw.set_attr("apix_y",pixel_size_origin) # imw.set_attr("apix_z",pixel_size_origin) imw.write_image(otcl_images, ind) ind += 1 n_total_coordinates_detect += len(coords) n_total_coordinates_process += ind n_total_coordinates_reject_out_of_boundary += n_coordinates_reject_out_of_boundary # Print out the summary of this micrograph print_msg('\n') print_msg('Micrograph summary of coordinates...\n') print_msg('Detected : %4d\n' % (len(coords))) print_msg('Processed : %4d\n' % (ind)) print_msg('Rejected by out of boundary : %4d\n' % (n_coordinates_reject_out_of_boundary)) if options.limitctf: # Print out the summary of CTF-limit filtering print_msg('\n') print_msg('Global summary of CTF-limit filtering (--limitctf) ...\n') print_msg('Percentage of filtered micrographs: %8.2f\n' % (len(cutoffhistogram) * 100.0 / len(micnames))) lhist = 10 if len(cutoffhistogram) >= lhist: from statistics import hist_list region,hist = hist_list(cutoffhistogram, lhist) print_msg(" Histogram of cut off frequency\n") print_msg(" ERROR number of frequencies\n") for lhx in xrange(lhist): print_msg(" %14.7f %7d\n" % (region[lhx], hist[lhx])) # print_msg(" %10.3f %7d\n" % (region[lhx], hist[lhx])) else: print_msg("The number of filtered micrographs (%d) is less than the number of bins (%d). No histogram is produced.\n" % (len(cutoffhistogram), lhist)) # Print out the summary of all micrographs print_msg('\n') print_msg('Global summary of micrographs ...\n') print_msg('Detected : %6d\n' % (len(micnames))) print_msg('Processed : %6d\n' % (n_micrographs_process)) print_msg('Rejected by no micrograph file : %6d\n' % (n_micrographs_reject_no_micrograph)) print_msg('Rejected by no coordinates file : %6d\n' % (n_micrographs_reject_no_coordinates)) print_msg('Rejected by no CTER entry : %6d\n' % (n_micrographs_reject_no_cter_entry)) print_msg('\n') print_msg('Global summary of coordinates ...\n') print_msg('Detected : %6d\n' % (n_total_coordinates_detect)) print_msg('Processed : %6d\n' % (n_total_coordinates_process)) print_msg('Rejected by out of boundary : %6d\n' % (n_total_coordinates_reject_out_of_boundary)) print_msg('\n')
def main(): progname = os.path.basename(sys.argv[0]) usage = progname + """ input_micrograph_list_file input_micrograph_pattern input_coordinates_pattern output_directory --coordinates_format --box_size=box_size --invert --import_ctf=ctf_file --limit_ctf --resample_ratio=resample_ratio --defocus_error=defocus_error --astigmatism_error=astigmatism_error Window particles from micrographs in input list file. The coordinates of the particles should be given as input. Please specify name pattern of input micrographs and coordinates files with a wild card (*). Use the wild card to indicate the place of micrograph ID (e.g. serial number, time stamp, and etc). The name patterns must be enclosed by single quotes (') or double quotes ("). (Note: sxgui.py automatically adds single quotes (')). BDB files can not be selected as input micrographs. sxwindow.py mic_list.txt ./mic*.hdf info/mic*_info.json particles --coordinates_format=eman2 --box_size=64 --invert --import_ctf=outdir_cter/partres/partres.txt If micrograph list file name is not provided, all files matched with the micrograph name pattern will be processed. sxwindow.py ./mic*.hdf info/mic*_info.json particles --coordinates_format=eman2 --box_size=64 --invert --import_ctf=outdir_cter/partres/partres.txt """ parser = OptionParser(usage, version=SPARXVERSION) parser.add_option("--coordinates_format", type="string", default="eman1", help="format of input coordinates files: 'sparx', 'eman1', 'eman2', or 'spider'. the coordinates of sparx, eman2, and spider format is particle center. the coordinates of eman1 format is particle box conner associated with the original box size. (default eman1)") parser.add_option("--box_size", type="int", default=256, help="x and y dimension of square area to be windowed (in pixels): pixel size after resampling is assumed when resample_ratio < 1.0 (default 256)") parser.add_option("--invert", action="store_true", default=False, help="invert image contrast: recommended for cryo data (default False)") parser.add_option("--import_ctf", type="string", default="", help="file name of sxcter output: normally partres.txt (default none)") parser.add_option("--limit_ctf", action="store_true", default=False, help="filter micrographs based on the CTF limit: this option requires --import_ctf. (default False)") parser.add_option("--resample_ratio", type="float", default=1.0, help="ratio of new to old image size (or old to new pixel size) for resampling: Valid range is 0.0 < resample_ratio <= 1.0. (default 1.0)") parser.add_option("--defocus_error", type="float", default=1000000.0, help="defocus errror limit: exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocus_error percent. the error is computed as (std dev defocus)/defocus*100%. (default 1000000.0)" ) parser.add_option("--astigmatism_error", type="float", default=360.0, help="astigmatism error limit: Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatism_error degrees. (default 360.0)") ### detect if program is running under MPI RUNNING_UNDER_MPI = "OMPI_COMM_WORLD_SIZE" in os.environ main_node = 0 if RUNNING_UNDER_MPI: from mpi import mpi_init from mpi import MPI_COMM_WORLD, mpi_comm_rank, mpi_comm_size, mpi_barrier, mpi_reduce, MPI_INT, MPI_SUM mpi_init(0, []) myid = mpi_comm_rank(MPI_COMM_WORLD) number_of_processes = mpi_comm_size(MPI_COMM_WORLD) else: number_of_processes = 1 myid = 0 (options, args) = parser.parse_args(sys.argv[1:]) mic_list_file_path = None mic_pattern = None coords_pattern = None error_status = None while True: if len(args) < 3 or len(args) > 4: error_status = ("Please check usage for number of arguments.\n Usage: " + usage + "\n" + "Please run %s -h for help." % (progname), getframeinfo(currentframe())) break if len(args) == 3: mic_pattern = args[0] coords_pattern = args[1] out_dir = args[2] else: # assert(len(args) == 4) mic_list_file_path = args[0] mic_pattern = args[1] coords_pattern = args[2] out_dir = args[3] if mic_list_file_path != None: if os.path.splitext(mic_list_file_path)[1] != ".txt": error_status = ("Extension of input micrograph list file must be \".txt\". Please check input_micrograph_list_file argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) break if mic_pattern[:len("bdb:")].lower() == "bdb": error_status = ("BDB file can not be selected as input micrographs. Please convert the format, and restart the program. Run %s -h for help." % (progname), getframeinfo(currentframe())) break if mic_pattern.find("*") == -1: error_status = ("Input micrograph file name pattern must contain wild card (*). Please check input_micrograph_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) break if coords_pattern.find("*") == -1: error_status = ("Input coordinates file name pattern must contain wild card (*). Please check input_coordinates_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) break if myid == main_node: if os.path.exists(out_dir): error_status = ("Output directory exists. Please change the name and restart the program.", getframeinfo(currentframe())) break break if_error_then_all_processes_exit_program(error_status) # Check invalid conditions of options check_options(options, progname) mic_name_list = None error_status = None if myid == main_node: if mic_list_file_path != None: print("Loading micrograph list from %s file ..." % (mic_list_file_path)) mic_name_list = read_text_file(mic_list_file_path) if len(mic_name_list) == 0: print("Directory of first micrograph entry is " % (os.path.dirname(mic_name_list[0]))) else: # assert (mic_list_file_path == None) print("Generating micrograph list in %s directory..." % (os.path.dirname(mic_pattern))) mic_name_list = glob.glob(mic_pattern) if len(mic_name_list) == 0: error_status = ("No micrograph file is found. Please check input_micrograph_pattern and/or input_micrograph_list_file argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) else: print("Found %d microgarphs" % len(mic_name_list)) if_error_then_all_processes_exit_program(error_status) if RUNNING_UNDER_MPI: mic_name_list = wrap_mpi_bcast(mic_name_list, main_node) coords_name_list = None error_status = None if myid == main_node: coords_name_list = glob.glob(coords_pattern) if len(coords_name_list) == 0: error_status = ("No coordinates file is found. Please check input_coordinates_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe())) if_error_then_all_processes_exit_program(error_status) if RUNNING_UNDER_MPI: coords_name_list = wrap_mpi_bcast(coords_name_list, main_node) ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## # all processes must have access to indices if options.import_ctf: i_enum = -1 i_enum += 1; idx_cter_def = i_enum # defocus [um]; index must be same as ctf object format i_enum += 1; idx_cter_cs = i_enum # Cs [mm]; index must be same as ctf object format i_enum += 1; idx_cter_vol = i_enum # voltage[kV]; index must be same as ctf object format i_enum += 1; idx_cter_apix = i_enum # pixel size [A]; index must be same as ctf object format i_enum += 1; idx_cter_bfactor = i_enum # B-factor [A^2]; index must be same as ctf object format i_enum += 1; idx_cter_ac = i_enum # amplitude contrast [%]; index must be same as ctf object format i_enum += 1; idx_cter_astig_amp = i_enum # astigmatism amplitude [um]; index must be same as ctf object format i_enum += 1; idx_cter_astig_ang = i_enum # astigmatism angle [degree]; index must be same as ctf object format i_enum += 1; idx_cter_sd_def = i_enum # std dev of defocus [um] i_enum += 1; idx_cter_sd_astig_amp = i_enum # std dev of ast amp [A] i_enum += 1; idx_cter_sd_astig_ang = i_enum # std dev of ast angle [degree] i_enum += 1; idx_cter_cv_def = i_enum # coefficient of variation of defocus [%] i_enum += 1; idx_cter_cv_astig_amp = i_enum # coefficient of variation of ast amp [%] i_enum += 1; idx_cter_spectra_diff = i_enum # average of differences between with- and without-astig. experimental 1D spectra at extrema i_enum += 1; idx_cter_error_def = i_enum # frequency at which signal drops by 50% due to estimated error of defocus alone [1/A] i_enum += 1; idx_cter_error_astig = i_enum # frequency at which signal drops by 50% due to estimated error of defocus and astigmatism [1/A] i_enum += 1; idx_cter_error_ctf = i_enum # limit frequency by CTF error [1/A] i_enum += 1; idx_cter_mic_name = i_enum # micrograph name i_enum += 1; n_idx_cter = i_enum # Prepare loop variables mic_basename_pattern = os.path.basename(mic_pattern) # file pattern without path mic_baseroot_pattern = os.path.splitext(mic_basename_pattern)[0] # file pattern without path and extension coords_format = options.coordinates_format.lower() box_size = options.box_size box_half = box_size // 2 mask2d = model_circle(box_size//2, box_size, box_size) # Create circular 2D mask to Util.infomask of particle images resample_ratio = options.resample_ratio n_mic_process = 0 n_mic_reject_no_coords = 0 n_mic_reject_no_cter_entry = 0 n_global_coords_detect = 0 n_global_coords_process = 0 n_global_coords_reject_out_of_boundary = 0 serial_id_list = [] error_status = None ## not a real while, an if with the opportunity to use break when errors need to be reported while myid == main_node: # # NOTE: 2016/05/24 Toshio Moriya # Now, ignores the path in mic_pattern and entries of mic_name_list to create serial ID # Only the basename (file name) in micrograph path must be match # # Create list of micrograph serial ID # Break micrograph name pattern into prefix and suffix to find the head index of the micrograph serial id # mic_basename_tokens = mic_basename_pattern.split('*') # assert (len(mic_basename_tokens) == 2) serial_id_head_index = len(mic_basename_tokens[0]) # Loop through micrograph names for mic_name in mic_name_list: # Find the tail index of the serial id and extract serial id from the micrograph name mic_basename = os.path.basename(mic_name) serial_id_tail_index = mic_basename.index(mic_basename_tokens[1]) serial_id = mic_basename[serial_id_head_index:serial_id_tail_index] serial_id_list.append(serial_id) # assert (len(serial_id_list) == len(mic_name)) del mic_name_list # Do not need this anymore # Load CTFs if necessary if options.import_ctf: ctf_list = read_text_row(options.import_ctf) # print("Detected CTF entries : %6d ..." % (len(ctf_list))) if len(ctf_list) == 0: error_status = ("No CTF entry is found in %s. Please check --import_ctf option. Run %s -h for help." % (options.import_ctf, progname), getframeinfo(currentframe())) break if (len(ctf_list[0]) != n_idx_cter): error_status = ("Number of columns (%d) must be %d in %s. The format might be old. Please run sxcter.py again." % (len(ctf_list[0]), n_idx_cter, options.import_ctf), getframeinfo(currentframe())) break ctf_dict={} n_reject_defocus_error = 0 ctf_error_limit = [options.defocus_error/100.0, options.astigmatism_error] for ctf_params in ctf_list: assert(len(ctf_params) == n_idx_cter) # mic_baseroot is name of micrograph minus the path and extension mic_baseroot = os.path.splitext(os.path.basename(ctf_params[idx_cter_mic_name]))[0] if(ctf_params[idx_cter_sd_def] / ctf_params[idx_cter_def] > ctf_error_limit[0]): print("Defocus error %f exceeds the threshold. Micrograph %s is rejected." % (ctf_params[idx_cter_sd_def] / ctf_params[idx_cter_def], mic_baseroot)) n_reject_defocus_error += 1 else: if(ctf_params[idx_cter_sd_astig_ang] > ctf_error_limit[1]): ctf_params[idx_cter_astig_amp] = 0.0 ctf_params[idx_cter_astig_ang] = 0.0 ctf_dict[mic_baseroot] = ctf_params del ctf_list # Do not need this anymore break if_error_then_all_processes_exit_program(error_status) if options.import_ctf: if options.limit_ctf: cutoff_histogram = [] #@ming compute the histogram for micrographs cut of by ctf_params limit. ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## restricted_serial_id_list = [] if myid == main_node: # Loop over serial IDs of micrographs for serial_id in serial_id_list: # mic_baseroot is name of micrograph minus the path and extension mic_baseroot = mic_baseroot_pattern.replace("*", serial_id) mic_name = mic_pattern.replace("*", serial_id) coords_name = coords_pattern.replace("*", serial_id) ########### # CHECKS: BEGIN if coords_name not in coords_name_list: print(" Cannot read %s. Skipping %s ..." % (coords_name, mic_baseroot)) n_mic_reject_no_coords += 1 continue # IF mic is in CTER results if options.import_ctf: if mic_baseroot not in ctf_dict: print(" Is not listed in CTER results. Skipping %s ..." % (mic_baseroot)) n_mic_reject_no_cter_entry += 1 continue else: ctf_params = ctf_dict[mic_baseroot] # CHECKS: END n_mic_process += 1 restricted_serial_id_list.append(serial_id) # restricted_serial_id_list = restricted_serial_id_list[:128] ## for testing against the nonMPI version if myid != main_node: if options.import_ctf: ctf_dict = None error_status = None if len(restricted_serial_id_list) < number_of_processes: error_status = ('Number of processes (%d) supplied by --np in mpirun cannot be greater than %d (number of micrographs that satisfy all criteria to be processed) ' % (number_of_processes, len(restricted_serial_id_list)), getframeinfo(currentframe())) if_error_then_all_processes_exit_program(error_status) ## keep a copy of the original output directory where the final bdb will be created original_out_dir = out_dir if RUNNING_UNDER_MPI: mpi_barrier(MPI_COMM_WORLD) restricted_serial_id_list = wrap_mpi_bcast(restricted_serial_id_list, main_node) mic_start, mic_end = MPI_start_end(len(restricted_serial_id_list), number_of_processes, myid) restricted_serial_id_list_not_sliced = restricted_serial_id_list restricted_serial_id_list = restricted_serial_id_list[mic_start:mic_end] if options.import_ctf: ctf_dict = wrap_mpi_bcast(ctf_dict, main_node) # generate subdirectories of out_dir, one for each process out_dir = os.path.join(out_dir,"%03d"%myid) if myid == main_node: print("Micrographs processed by main process (including percent complete):") len_processed_by_main_node_divided_by_100 = len(restricted_serial_id_list)/100.0 ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## ################################################################################################################################################################################################################## ##### Starting main parallel execution for my_idx, serial_id in enumerate(restricted_serial_id_list): mic_baseroot = mic_baseroot_pattern.replace("*", serial_id) mic_name = mic_pattern.replace("*", serial_id) coords_name = coords_pattern.replace("*", serial_id) if myid == main_node: print(mic_name, " ---> % 2.2f%%"%(my_idx/len_processed_by_main_node_divided_by_100)) mic_img = get_im(mic_name) # Read coordinates according to the specified format and # make the coordinates the center of particle image if coords_format == "sparx": coords_list = read_text_row(coords_name) elif coords_format == "eman1": coords_list = read_text_row(coords_name) for i in xrange(len(coords_list)): coords_list[i] = [(coords_list[i][0] + coords_list[i][2] // 2), (coords_list[i][1] + coords_list[i][3] // 2)] elif coords_format == "eman2": coords_list = js_open_dict(coords_name)["boxes"] for i in xrange(len(coords_list)): coords_list[i] = [coords_list[i][0], coords_list[i][1]] elif coords_format == "spider": coords_list = read_text_row(coords_name) for i in xrange(len(coords_list)): coords_list[i] = [coords_list[i][2], coords_list[i][3]] # else: assert (False) # Unreachable code # Calculate the new pixel size if options.import_ctf: ctf_params = ctf_dict[mic_baseroot] pixel_size_origin = ctf_params[idx_cter_apix] if resample_ratio < 1.0: # assert (resample_ratio > 0.0) new_pixel_size = pixel_size_origin / resample_ratio print("Resample micrograph to pixel size %6.4f and window segments from resampled micrograph." % new_pixel_size) else: # assert (resample_ratio == 1.0) new_pixel_size = pixel_size_origin # Set ctf along with new pixel size in resampled micrograph ctf_params[idx_cter_apix] = new_pixel_size else: # assert (not options.import_ctf) if resample_ratio < 1.0: # assert (resample_ratio > 0.0) print("Resample micrograph with ratio %6.4f and window segments from resampled micrograph." % resample_ratio) # else: # assert (resample_ratio == 1.0) # Apply filters to micrograph fftip(mic_img) if options.limit_ctf: # assert (options.import_ctf) # Cut off frequency components higher than CTF limit q1, q2 = ctflimit(box_size, ctf_params[idx_cter_def], ctf_params[idx_cter_cs], ctf_params[idx_cter_vol], new_pixel_size) # This is absolute frequency of CTF limit in scale of original micrograph if resample_ratio < 1.0: # assert (resample_ratio > 0.0) q1 = resample_ratio * q1 / float(box_size) # q1 = (pixel_size_origin / new_pixel_size) * q1/float(box_size) else: # assert (resample_ratio == 1.0) -> pixel_size_origin == new_pixel_size -> pixel_size_origin / new_pixel_size == 1.0 q1 = q1 / float(box_size) if q1 < 0.5: mic_img = filt_tanl(mic_img, q1, 0.01) cutoff_histogram.append(q1) # Cut off frequency components lower than the box size can express mic_img = fft(filt_gaussh(mic_img, resample_ratio / box_size)) # Resample micrograph, map coordinates, and window segments from resampled micrograph using new coordinates # after resampling by resample_ratio, new pixel size will be pixel_size/resample_ratio = new_pixel_size # NOTE: 2015/04/13 Toshio Moriya # resample() efficiently takes care of the case resample_ratio = 1.0 but # it does not set apix_*. Even though it sets apix_* when resample_ratio < 1.0 ... mic_img = resample(mic_img, resample_ratio) if options.invert: mic_stats = Util.infomask(mic_img, None, True) # mic_stat[0:mean, 1:SD, 2:min, 3:max] Util.mul_scalar(mic_img, -1.0) mic_img += 2 * mic_stats[0] if options.import_ctf: from utilities import generate_ctf ctf_obj = generate_ctf(ctf_params) # indexes 0 to 7 (idx_cter_def to idx_cter_astig_ang) must be same in cter format & ctf object format. # Prepare loop variables nx = mic_img.get_xsize() ny = mic_img.get_ysize() x0 = nx//2 y0 = ny//2 n_coords_reject_out_of_boundary = 0 local_stack_name = "bdb:%s#" % out_dir + mic_baseroot + '_ptcls' local_particle_id = 0 # can be different from coordinates_id # Loop over coordinates for coords_id in xrange(len(coords_list)): x = int(coords_list[coords_id][0]) y = int(coords_list[coords_id][1]) if resample_ratio < 1.0: # assert (resample_ratio > 0.0) x = int(x * resample_ratio) y = int(y * resample_ratio) # else: # assert(resample_ratio == 1.0) if( (0 <= x - box_half) and ( x + box_half <= nx ) and (0 <= y - box_half) and ( y + box_half <= ny ) ): particle_img = Util.window(mic_img, box_size, box_size, 1, x-x0, y-y0) else: print("In %s, coordinates ID = %04d (x = %4d, y = %4d, box_size = %4d) is out of micrograph bound, skipping ..." % (mic_baseroot, coords_id, x, y, box_size)) n_coords_reject_out_of_boundary += 1 continue particle_img = ramp(particle_img) particle_stats = Util.infomask(particle_img, mask2d, False) # particle_stats[0:mean, 1:SD, 2:min, 3:max] particle_img -= particle_stats[0] particle_img /= particle_stats[1] # NOTE: 2015/04/09 Toshio Moriya # ptcl_source_image might be redundant information ... # Consider re-organizing header entries... particle_img.set_attr("ptcl_source_image", mic_name) particle_img.set_attr("ptcl_source_coord_id", coords_id) particle_img.set_attr("ptcl_source_coord", [int(coords_list[coords_id][0]), int(coords_list[coords_id][1])]) particle_img.set_attr("resample_ratio", resample_ratio) # NOTE: 2015/04/13 Toshio Moriya # apix_* attributes are updated by resample() only when resample_ratio != 1.0 # Let's make sure header info is consistent by setting apix_* = 1.0 # regardless of options, so it is not passed down the processing line particle_img.set_attr("apix_x", 1.0) particle_img.set_attr("apix_y", 1.0) particle_img.set_attr("apix_z", 1.0) if options.import_ctf: particle_img.set_attr("ctf",ctf_obj) particle_img.set_attr("ctf_applied", 0) particle_img.set_attr("pixel_size_origin", pixel_size_origin) # particle_img.set_attr("apix_x", new_pixel_size) # particle_img.set_attr("apix_y", new_pixel_size) # particle_img.set_attr("apix_z", new_pixel_size) # NOTE: 2015/04/13 Toshio Moriya # Pawel Comment: Micrograph is not supposed to have CTF header info. # So, let's assume it does not exist & ignore its presence. # Note that resample() "correctly" updates pixel size of CTF header info if it exists # elif (particle_img.has_ctff()): # assert(not options.import_ctf) # ctf_origin = particle_img.get_attr("ctf_obj") # pixel_size_origin = round(ctf_origin.apix, 5) # Because SXCTER ouputs up to 5 digits # particle_img.set_attr("apix_x",pixel_size_origin) # particle_img.set_attr("apix_y",pixel_size_origin) # particle_img.set_attr("apix_z",pixel_size_origin) # print("local_stack_name, local_particle_id", local_stack_name, local_particle_id) particle_img.write_image(local_stack_name, local_particle_id) local_particle_id += 1 n_global_coords_detect += len(coords_list) n_global_coords_process += local_particle_id n_global_coords_reject_out_of_boundary += n_coords_reject_out_of_boundary # # MRK_DEBUG: Toshio Moriya 2016/05/03 # # Following codes are for debugging bdb. Delete in future # result = db_check_dict(local_stack_name) # print('# MRK_DEBUG: result = db_check_dict(local_stack_name): %s' % (result)) # result = db_list_dicts('bdb:%s' % out_dir) # print('# MRK_DEBUG: result = db_list_dicts(out_dir): %s' % (result)) # result = db_get_image_info(local_stack_name) # print('# MRK_DEBUG: result = db_get_image_info(local_stack_name)', result) # Release the data base of local stack from this process # so that the subprocess can access to the data base db_close_dict(local_stack_name) # # MRK_DEBUG: Toshio Moriya 2016/05/03 # # Following codes are for debugging bdb. Delete in future # cmd_line = "e2iminfo.py %s" % (local_stack_name) # print('# MRK_DEBUG: Executing the command: %s' % (cmd_line)) # cmdexecute(cmd_line) # # MRK_DEBUG: Toshio Moriya 2016/05/03 # # Following codes are for debugging bdb. Delete in future # cmd_line = "e2iminfo.py bdb:%s#data" % (out_dir) # print('# MRK_DEBUG: Executing the command: %s' % (cmd_line)) # cmdexecute(cmd_line) if RUNNING_UNDER_MPI: if options.import_ctf: if options.limit_ctf: cutoff_histogram = wrap_mpi_gatherv(cutoff_histogram, main_node) if myid == main_node: if options.limit_ctf: # Print out the summary of CTF-limit filtering print(" ") print("Global summary of CTF-limit filtering (--limit_ctf) ...") print("Percentage of filtered micrographs: %8.2f\n" % (len(cutoff_histogram) * 100.0 / len(restricted_serial_id_list_not_sliced))) n_bins = 10 if len(cutoff_histogram) >= n_bins: from statistics import hist_list cutoff_region, cutoff_counts = hist_list(cutoff_histogram, n_bins) print(" Histogram of cut-off frequency") print(" cut-off counts") for bin_id in xrange(n_bins): print(" %14.7f %7d" % (cutoff_region[bin_id], cutoff_counts[bin_id])) else: print("The number of filtered micrographs (%d) is less than the number of bins (%d). No histogram is produced." % (len(cutoff_histogram), n_bins)) n_mic_process = mpi_reduce(n_mic_process, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_mic_reject_no_coords = mpi_reduce(n_mic_reject_no_coords, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_mic_reject_no_cter_entry = mpi_reduce(n_mic_reject_no_cter_entry, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_global_coords_detect = mpi_reduce(n_global_coords_detect, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_global_coords_process = mpi_reduce(n_global_coords_process, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) n_global_coords_reject_out_of_boundary = mpi_reduce(n_global_coords_reject_out_of_boundary, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD) # Print out the summary of all micrographs if main_node == myid: print(" ") print("Global summary of micrographs ...") print("Detected : %6d" % (len(restricted_serial_id_list_not_sliced))) print("Processed : %6d" % (n_mic_process)) print("Rejected by no coordinates file : %6d" % (n_mic_reject_no_coords)) print("Rejected by no CTER entry : %6d" % (n_mic_reject_no_cter_entry)) print(" ") print("Global summary of coordinates ...") print("Detected : %6d" % (n_global_coords_detect)) print("Processed : %6d" % (n_global_coords_process)) print("Rejected by out of boundary : %6d" % (n_global_coords_reject_out_of_boundary)) # print(" ") # print("DONE!!!") mpi_barrier(MPI_COMM_WORLD) if main_node == myid: import time time.sleep(1) print("\n Creating bdb:%s/data\n"%original_out_dir) for proc_i in range(number_of_processes): mic_start, mic_end = MPI_start_end(len(restricted_serial_id_list_not_sliced), number_of_processes, proc_i) for serial_id in restricted_serial_id_list_not_sliced[mic_start:mic_end]: e2bdb_command = "e2bdb.py " mic_baseroot = mic_baseroot_pattern.replace("*", serial_id) if RUNNING_UNDER_MPI: e2bdb_command += "bdb:" + os.path.join(original_out_dir,"%03d/"%proc_i) + mic_baseroot + "_ptcls " else: e2bdb_command += "bdb:" + os.path.join(original_out_dir, mic_baseroot + "_ptcls ") e2bdb_command += " --appendvstack=bdb:%s/data 1>/dev/null"%original_out_dir cmdexecute(e2bdb_command, printing_on_success = False) print("Done!\n") if RUNNING_UNDER_MPI: mpi_barrier(MPI_COMM_WORLD) from mpi import mpi_finalize mpi_finalize() sys.stdout.flush() sys.exit(0)