Example #1
0
def ali3d_MPI(stack, ref_vol, outdir, maskfile = None, ir = 1, ou = -1, rs = 1, 
	    xr = "4 2 2 1", yr = "-1", ts = "1 1 0.5 0.25", delta = "10 6 4 4", an = "-1",
	    center = 0, maxit = 5, term = 95, CTF = False, fourvar = False, snr = 1.0,  ref_a = "S", sym = "c1", 
	    sort=True, cutoff=999.99, pix_cutoff="0", two_tail=False, model_jump="1 1 1 1 1", restart=False, save_half=False,
	    protos=None, oplane=None, lmask=-1, ilmask=-1, findseam=False, vertstep=None, hpars="-1", hsearch="73.0 170.0",
	    full_output = False, compare_repro = False, compare_ref_free = "-1", ref_free_cutoff= "-1 -1 -1 -1",
	    wcmask = None, debug = False, recon_pad = 4):

	from alignment      import Numrinit, prepare_refrings
	from utilities      import model_circle, get_image, drop_image, get_input_from_string
	from utilities      import bcast_list_to_all, bcast_number_to_all, reduce_EMData_to_root, bcast_EMData_to_all 
	from utilities      import send_attr_dict
	from utilities      import get_params_proj, file_type
	from fundamentals   import rot_avg_image
	import os
	import types
	from utilities      import print_begin_msg, print_end_msg, print_msg
	from mpi	    import mpi_bcast, mpi_comm_size, mpi_comm_rank, MPI_FLOAT, MPI_COMM_WORLD, mpi_barrier, mpi_reduce
	from mpi	    import mpi_reduce, MPI_INT, MPI_SUM, mpi_finalize
	from filter	 import filt_ctf
	from projection     import prep_vol, prgs
	from statistics     import hist_list, varf3d_MPI, fsc_mask
	from numpy	  import array, bincount, array2string, ones

	number_of_proc = mpi_comm_size(MPI_COMM_WORLD)
	myid	   = mpi_comm_rank(MPI_COMM_WORLD)
	main_node = 0
	if myid == main_node:
		if os.path.exists(outdir):  ERROR('Output directory exists, please change the name and restart the program', "ali3d_MPI", 1)
		os.mkdir(outdir)
	mpi_barrier(MPI_COMM_WORLD)

	if debug:
		from time import sleep
		while not os.path.exists(outdir):
			print  "Node ",myid,"  waiting..."
			sleep(5)

		info_file = os.path.join(outdir, "progress%04d"%myid)
		finfo = open(info_file, 'w')
	else:
		finfo = None
	mjump = get_input_from_string(model_jump)
	xrng	= get_input_from_string(xr)
	if  yr == "-1":  yrng = xrng
	else	  :  yrng = get_input_from_string(yr)
	step	= get_input_from_string(ts)
	delta       = get_input_from_string(delta)
	ref_free_cutoff = get_input_from_string(ref_free_cutoff)	
	pix_cutoff = get_input_from_string(pix_cutoff)
	
	lstp = min(len(xrng), len(yrng), len(step), len(delta))
	if an == "-1":
		an = [-1] * lstp
	else:
		an = get_input_from_string(an)
	# make sure pix_cutoff is set for all iterations
	if len(pix_cutoff)<lstp:
		for i in xrange(len(pix_cutoff),lstp):
			pix_cutoff.append(pix_cutoff[-1])
	# don't waste time on sub-pixel alignment for low-resolution ang incr
	for i in range(len(step)):
		if (delta[i] > 4 or delta[i] == -1) and step[i] < 1:
			step[i] = 1

	first_ring  = int(ir)
	rstep       = int(rs)
	last_ring   = int(ou)
	max_iter    = int(maxit)
	center      = int(center)

	nrefs   = EMUtil.get_image_count( ref_vol )
	nmasks = 0
	if maskfile:
		# read number of masks within each maskfile (mc)
		nmasks   = EMUtil.get_image_count( maskfile )
		# open masks within maskfile (mc)
		maskF   = EMData.read_images(maskfile, xrange(nmasks))
	vol     = EMData.read_images(ref_vol, xrange(nrefs))
	nx      = vol[0].get_xsize()

	## make sure box sizes are the same
	if myid == main_node:
		im=EMData.read_images(stack,[0])
		bx = im[0].get_xsize()
		if bx!=nx:
			print_msg("Error: Stack box size (%i) differs from initial model (%i)\n"%(bx,nx))
			sys.exit()
		del im,bx
	
	# for helical processing:
	helicalrecon = False
	if protos is not None or hpars != "-1" or findseam is True:
		helicalrecon = True
		# if no out-of-plane param set, use 5 degrees
		if oplane is None:
			oplane=5.0
	if protos is not None:
		proto = get_input_from_string(protos)
		if len(proto) != nrefs:
			print_msg("Error: insufficient protofilament numbers supplied")
			sys.exit()
	if hpars != "-1":
		hpars = get_input_from_string(hpars)
		if len(hpars) != 2*nrefs:
			print_msg("Error: insufficient helical parameters supplied")
			sys.exit()
	## create helical parameter file for helical reconstruction
	if helicalrecon is True and myid == main_node:
		from hfunctions import createHpar
		# create initial helical parameter files
		dp=[0]*nrefs
		dphi=[0]*nrefs
		vdp=[0]*nrefs
		vdphi=[0]*nrefs
		for iref in xrange(nrefs):
			hpar = os.path.join(outdir,"hpar%02d.spi"%(iref))
			params = False
			if hpars != "-1":
				# if helical parameters explicitly given, set twist & rise
				params = [float(hpars[iref*2]),float(hpars[(iref*2)+1])]
			dp[iref],dphi[iref],vdp[iref],vdphi[iref] = createHpar(hpar,proto[iref],params,vertstep)

	# get values for helical search parameters
	hsearch = get_input_from_string(hsearch)
	if len(hsearch) != 2:
		print_msg("Error: specify outer and inner radii for helical search")
		sys.exit()

	if last_ring < 0 or last_ring > int(nx/2)-2 :	last_ring = int(nx/2) - 2

	if myid == main_node:
	#	import user_functions
	#	user_func = user_functions.factory[user_func_name]

		print_begin_msg("ali3d_MPI")
		print_msg("Input stack		 : %s\n"%(stack))
		print_msg("Reference volume	    : %s\n"%(ref_vol))	
		print_msg("Output directory	    : %s\n"%(outdir))
		if nmasks > 0:
			print_msg("Maskfile (number of masks)  : %s (%i)\n"%(maskfile,nmasks))
		print_msg("Inner radius		: %i\n"%(first_ring))
		print_msg("Outer radius		: %i\n"%(last_ring))
		print_msg("Ring step		   : %i\n"%(rstep))
		print_msg("X search range	      : %s\n"%(xrng))
		print_msg("Y search range	      : %s\n"%(yrng))
		print_msg("Translational step	  : %s\n"%(step))
		print_msg("Angular step		: %s\n"%(delta))
		print_msg("Angular search range	: %s\n"%(an))
		print_msg("Maximum iteration	   : %i\n"%(max_iter))
		print_msg("Center type		 : %i\n"%(center))
		print_msg("CTF correction	      : %s\n"%(CTF))
		print_msg("Signal-to-Noise Ratio       : %f\n"%(snr))
		print_msg("Reference projection method : %s\n"%(ref_a))
		print_msg("Symmetry group	      : %s\n"%(sym))
		print_msg("Fourier padding for 3D      : %i\n"%(recon_pad))
		print_msg("Number of reference models  : %i\n"%(nrefs))
		print_msg("Sort images between models  : %s\n"%(sort))
		print_msg("Allow images to jump	: %s\n"%(mjump))
		print_msg("CC cutoff standard dev      : %f\n"%(cutoff))
		print_msg("Two tail cutoff	     : %s\n"%(two_tail))
		print_msg("Termination pix error       : %f\n"%(term))
		print_msg("Pixel error cutoff	  : %s\n"%(pix_cutoff))
		print_msg("Restart		     : %s\n"%(restart))
		print_msg("Full output		 : %s\n"%(full_output))
		print_msg("Compare reprojections       : %s\n"%(compare_repro))
		print_msg("Compare ref free class avgs : %s\n"%(compare_ref_free))
		print_msg("Use cutoff from ref free    : %s\n"%(ref_free_cutoff))
		if protos:
			print_msg("Protofilament numbers	: %s\n"%(proto))
			print_msg("Using helical search range   : %s\n"%hsearch) 
		if findseam is True:
			print_msg("Using seam-based reconstruction\n")
		if hpars != "-1":
			print_msg("Using hpars		  : %s\n"%hpars)
		if vertstep != None:
			print_msg("Using vertical step    : %.2f\n"%vertstep)
		if save_half is True:
			print_msg("Saving even/odd halves\n")
		for i in xrange(100) : print_msg("*")
		print_msg("\n\n")
	if maskfile:
		if type(maskfile) is types.StringType: mask3D = get_image(maskfile)
		else:				  mask3D = maskfile
	else: mask3D = model_circle(last_ring, nx, nx, nx)

	numr	= Numrinit(first_ring, last_ring, rstep, "F")
	mask2D  = model_circle(last_ring,nx,nx) - model_circle(first_ring,nx,nx)

	fscmask = model_circle(last_ring,nx,nx,nx)
	if CTF:
		from filter	 import filt_ctf
	from reconstruction_rjh import rec3D_MPI_noCTF

	if myid == main_node:
		active = EMUtil.get_all_attributes(stack, 'active')
		list_of_particles = []
		for im in xrange(len(active)):
			if active[im]:  list_of_particles.append(im)
		del active
		nima = len(list_of_particles)
	else:
		nima = 0
	total_nima = bcast_number_to_all(nima, source_node = main_node)

	if myid != main_node:
		list_of_particles = [-1]*total_nima
	list_of_particles = bcast_list_to_all(list_of_particles, source_node = main_node)

	image_start, image_end = MPI_start_end(total_nima, number_of_proc, myid)

	# create a list of images for each node
	list_of_particles = list_of_particles[image_start: image_end]
	nima = len(list_of_particles)
	if debug:
		finfo.write("image_start, image_end: %d %d\n" %(image_start, image_end))
		finfo.flush()

	data = EMData.read_images(stack, list_of_particles)

	t_zero = Transform({"type":"spider","phi":0,"theta":0,"psi":0,"tx":0,"ty":0})
	transmulti = [[t_zero for i in xrange(nrefs)] for j in xrange(nima)]

	for iref,im in ((iref,im) for iref in xrange(nrefs) for im in xrange(nima)):
		if nrefs == 1:
			transmulti[im][iref] = data[im].get_attr("xform.projection")
		else:
			# if multi models, keep track of eulers for all models
			try:
				transmulti[im][iref] = data[im].get_attr("eulers_txty.%i"%iref)
			except:
				data[im].set_attr("eulers_txty.%i"%iref,t_zero)

	scoremulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)] 
	pixelmulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)] 
	ref_res = [0.0 for x in xrange(nrefs)] 
	apix = data[0].get_attr('apix_x')

	# for oplane parameter, create cylindrical mask
	if oplane is not None and myid == main_node:
		from hfunctions import createCylMask
		cmaskf=os.path.join(outdir, "mask3D_cyl.mrc")
		mask3D = createCylMask(data,ou,lmask,ilmask,cmaskf)
		# if finding seam of helix, create wedge masks
		if findseam is True:
			wedgemask=[]
			for pf in xrange(nrefs):
				wedgemask.append(EMData())
			# wedgemask option
			if wcmask is not None:
				wcmask = get_input_from_string(wcmask)
				if len(wcmask) != 3:
					print_msg("Error: wcmask option requires 3 values: x y radius")
					sys.exit()

	# determine if particles have helix info:
	try:
		data[0].get_attr('h_angle')
		original_data = []
		boxmask = True
		from hfunctions import createBoxMask
	except:
		boxmask = False

	# prepare particles
	for im in xrange(nima):
		data[im].set_attr('ID', list_of_particles[im])
		data[im].set_attr('pix_score', int(0))
		if CTF:
			# only phaseflip particles, not full CTF correction
			ctf_params = data[im].get_attr("ctf")
			st = Util.infomask(data[im], mask2D, False)
			data[im] -= st[0]
			data[im] = filt_ctf(data[im], ctf_params, sign = -1, binary=1)
			data[im].set_attr('ctf_applied', 1)
		# for window mask:
		if boxmask is True:
			h_angle = data[im].get_attr("h_angle")
			original_data.append(data[im].copy())
			bmask = createBoxMask(nx,apix,ou,lmask,h_angle)
			data[im]*=bmask
			del bmask
	if debug:
		finfo.write( '%d loaded  \n' % nima )
		finfo.flush()
	if myid == main_node:
		# initialize data for the reference preparation function
		ref_data = [ mask3D, max(center,0), None, None, None, None ]
		# for method -1, switch off centering in user function

	from time import time	

	#  this is needed for gathering of pixel errors
	disps = []
	recvcount = []
	disps_score = []
	recvcount_score = []
	for im in xrange(number_of_proc):
		if( im == main_node ):  
			disps.append(0)
			disps_score.append(0)
		else:		  
			disps.append(disps[im-1] + recvcount[im-1])
			disps_score.append(disps_score[im-1] + recvcount_score[im-1])
		ib, ie = MPI_start_end(total_nima, number_of_proc, im)
		recvcount.append( ie - ib )
		recvcount_score.append((ie-ib)*nrefs)

	pixer = [0.0]*nima
	cs = [0.0]*3
	total_iter = 0
	volodd = EMData.read_images(ref_vol, xrange(nrefs))
	voleve = EMData.read_images(ref_vol, xrange(nrefs))

	if restart:
		# recreate initial volumes from alignments stored in header
		itout = "000_00"
		for iref in xrange(nrefs):
			if(nrefs == 1):
				modout = ""
			else:
				modout = "_model_%02d"%(iref)	
	
			if(sort): 
				group = iref
				for im in xrange(nima):
					imgroup = data[im].get_attr('group')
					if imgroup == iref:
						data[im].set_attr('xform.projection',transmulti[im][iref])
			else: 
				group = int(999) 
				for im in xrange(nima):
					data[im].set_attr('xform.projection',transmulti[im][iref])
			
			fscfile = os.path.join(outdir, "fsc_%s%s"%(itout,modout))

			vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF(data, sym, fscmask, fscfile, myid, main_node, index = group, npad = recon_pad)

			if myid == main_node:
				if helicalrecon:
					from hfunctions import processHelicalVol

					vstep=None
					if vertstep is not None:
						vstep=(vdp[iref],vdphi[iref])
					print_msg("Old rise and twist for model %i     : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref]))
					hvals=processHelicalVol(vol[iref],voleve[iref],volodd[iref],iref,outdir,itout,
								dp[iref],dphi[iref],apix,hsearch,findseam,vstep,wcmask)
					(vol[iref],voleve[iref],volodd[iref],dp[iref],dphi[iref],vdp[iref],vdphi[iref])=hvals
					print_msg("New rise and twist for model %i     : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref]))
					# get new FSC from symmetrized half volumes
					fscc = fsc_mask( volodd[iref], voleve[iref], mask3D, rstep, fscfile)
				else:
					vol[iref].write_image(os.path.join(outdir, "vol_%s.hdf"%itout),-1)

				if save_half is True:
					volodd[iref].write_image(os.path.join(outdir, "volodd_%s.hdf"%itout),-1)
					voleve[iref].write_image(os.path.join(outdir, "voleve_%s.hdf"%itout),-1)

				if nmasks > 1:
					# Read mask for multiplying
					ref_data[0] = maskF[iref]
				ref_data[2] = vol[iref]
				ref_data[3] = fscc
				#  call user-supplied function to prepare reference image, i.e., center and filter it
				vol[iref], cs,fl = ref_ali3d(ref_data)
				vol[iref].write_image(os.path.join(outdir, "volf_%s.hdf"%(itout)),-1)
				if (apix == 1):
					res_msg = "Models filtered at spatial frequency of:\t"
					res = fl
				else:
					res_msg = "Models filtered at resolution of:       \t"
					res = apix / fl	
				ares = array2string(array(res), precision = 2)
				print_msg("%s%s\n\n"%(res_msg,ares))	
			
			bcast_EMData_to_all(vol[iref], myid, main_node)
			# write out headers, under MPI writing has to be done sequentially
			mpi_barrier(MPI_COMM_WORLD)

	# projection matching	
	for N_step in xrange(lstp):
		terminate = 0
		Iter = -1
 		while(Iter < max_iter-1 and terminate == 0):
			Iter += 1
			total_iter += 1
			itout = "%03g_%02d" %(delta[N_step], Iter)
			if myid == main_node:
				print_msg("ITERATION #%3d, inner iteration #%3d\nDelta = %4.1f, an = %5.2f, xrange = %5.2f, yrange = %5.2f, step = %5.2f\n\n"%(N_step, Iter, delta[N_step], an[N_step], xrng[N_step],yrng[N_step],step[N_step]))
	
			for iref in xrange(nrefs):
				if myid == main_node: start_time = time()
				volft,kb = prep_vol( vol[iref] )

				## constrain projections to out of plane parameter
				theta1 = None
				theta2 = None
				if oplane is not None:
					theta1 = 90-oplane
					theta2 = 90+oplane
				refrings = prepare_refrings( volft, kb, nx, delta[N_step], ref_a, sym, numr, MPI=True, phiEqpsi = "Minus", initial_theta=theta1, delta_theta=theta2)
				
				del volft,kb

				if myid== main_node:
					print_msg( "Time to prepare projections for model %i: %s\n" % (iref, legibleTime(time()-start_time)) )
					start_time = time()
	
				for im in xrange( nima ):
					data[im].set_attr("xform.projection", transmulti[im][iref])
					if an[N_step] == -1:
						t1, peak, pixer[im] = proj_ali_incore(data[im],refrings,numr,xrng[N_step],yrng[N_step],step[N_step],finfo)
					else:
						t1, peak, pixer[im] = proj_ali_incore_local(data[im],refrings,numr,xrng[N_step],yrng[N_step],step[N_step],an[N_step],finfo)
					#data[im].set_attr("xform.projection"%iref, t1)
					if nrefs > 1: data[im].set_attr("eulers_txty.%i"%iref,t1)
					scoremulti[im][iref] = peak
					from pixel_error import max_3D_pixel_error
					# t1 is the current param, t2 is old
					t2 = transmulti[im][iref]
					pixelmulti[im][iref] = max_3D_pixel_error(t1,t2,numr[-3])
					transmulti[im][iref] = t1

				if myid == main_node:
					print_msg("Time of alignment for model %i: %s\n"%(iref, legibleTime(time()-start_time)))
					start_time = time()


			# gather scoring data from all processors
			from mpi import mpi_gatherv
			scoremultisend = sum(scoremulti,[])
			pixelmultisend = sum(pixelmulti,[])
			tmp = mpi_gatherv(scoremultisend,len(scoremultisend),MPI_FLOAT, recvcount_score, disps_score, MPI_FLOAT, main_node,MPI_COMM_WORLD)
			tmp1 = mpi_gatherv(pixelmultisend,len(pixelmultisend),MPI_FLOAT, recvcount_score, disps_score, MPI_FLOAT, main_node,MPI_COMM_WORLD)
			tmp = mpi_bcast(tmp,(total_nima * nrefs), MPI_FLOAT,0, MPI_COMM_WORLD)
			tmp1 = mpi_bcast(tmp1,(total_nima * nrefs), MPI_FLOAT,0, MPI_COMM_WORLD)
			tmp = map(float,tmp)
			tmp1 = map(float,tmp1)
			score = array(tmp).reshape(-1,nrefs)
			pixelerror = array(tmp1).reshape(-1,nrefs) 
			score_local = array(scoremulti)
			mean_score = score.mean(axis=0)
			std_score = score.std(axis=0)
			cut = mean_score - (cutoff * std_score)
			cut2 = mean_score + (cutoff * std_score)
			res_max = score_local.argmax(axis=1)
			minus_cc = [0.0 for x in xrange(nrefs)]
			minus_pix = [0.0 for x in xrange(nrefs)]
			minus_ref = [0.0 for x in xrange(nrefs)]
			
			#output pixel errors
			if(myid == main_node):
				from statistics import hist_list
				lhist = 20
				pixmin = pixelerror.min(axis=1)
				region, histo = hist_list(pixmin, lhist)
				if(region[0] < 0.0):  region[0] = 0.0
				print_msg("Histogram of pixel errors\n      ERROR       number of particles\n")
				for lhx in xrange(lhist):
					print_msg(" %10.3f     %7d\n"%(region[lhx], histo[lhx]))
				# Terminate if 95% within 1 pixel error
				im = 0
				for lhx in xrange(lhist):
					if(region[lhx] > 1.0): break
					im += histo[lhx]
				print_msg( "Percent of particles with pixel error < 1: %f\n\n"% (im/float(total_nima)*100))
				term_cond = float(term)/100
				if(im/float(total_nima) > term_cond): 
					terminate = 1
					print_msg("Terminating internal loop\n")
				del region, histo
			terminate = mpi_bcast(terminate, 1, MPI_INT, 0, MPI_COMM_WORLD)
			terminate = int(terminate[0])	
			
			for im in xrange(nima):
				if(sort==False):
					data[im].set_attr('group',999)
				elif (mjump[N_step]==1):
					data[im].set_attr('group',int(res_max[im]))
				
				pix_run = data[im].get_attr('pix_score')			
				if (pix_cutoff[N_step]==1 and (terminate==1 or Iter == max_iter-1)):
					if (pixelmulti[im][int(res_max[im])] > 1):
						data[im].set_attr('pix_score',int(777))

				if (score_local[im][int(res_max[im])]<cut[int(res_max[im])]) or (two_tail and score_local[im][int(res_max[im])]>cut2[int(res_max[im])]):
					data[im].set_attr('group',int(888))
					minus_cc[int(res_max[im])] = minus_cc[int(res_max[im])] + 1

				if(pix_run == 777):
					data[im].set_attr('group',int(777))
					minus_pix[int(res_max[im])] = minus_pix[int(res_max[im])] + 1

				if (compare_ref_free != "-1") and (ref_free_cutoff[N_step] != -1) and (total_iter > 1):
					id = data[im].get_attr('ID')
					if id in rejects:
						data[im].set_attr('group',int(666))
						minus_ref[int(res_max[im])] = minus_ref[int(res_max[im])] + 1	
						
				
			minus_cc_tot = mpi_reduce(minus_cc,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD)	
			minus_pix_tot = mpi_reduce(minus_pix,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD) 	
			minus_ref_tot = mpi_reduce(minus_ref,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD)
			if (myid == main_node):
				if(sort):
					tot_max = score.argmax(axis=1)
					res = bincount(tot_max)
				else:
					res = ones(nrefs) * total_nima
				print_msg("Particle distribution:	     \t\t%s\n"%(res*1.0))
				afcut1 = res - minus_cc_tot
				afcut2 = afcut1 - minus_pix_tot
				afcut3 = afcut2 - minus_ref_tot
				print_msg("Particle distribution after cc cutoff:\t\t%s\n"%(afcut1))
				print_msg("Particle distribution after pix cutoff:\t\t%s\n"%(afcut2)) 
				print_msg("Particle distribution after ref cutoff:\t\t%s\n\n"%(afcut3)) 
					
						
			res = [0.0 for i in xrange(nrefs)]
			for iref in xrange(nrefs):
				if(center == -1):
					from utilities      import estimate_3D_center_MPI, rotate_3D_shift
					dummy=EMData()
					cs[0], cs[1], cs[2], dummy, dummy = estimate_3D_center_MPI(data, total_nima, myid, number_of_proc, main_node)				
					cs = mpi_bcast(cs, 3, MPI_FLOAT, main_node, MPI_COMM_WORLD)
					cs = [-float(cs[0]), -float(cs[1]), -float(cs[2])]
					rotate_3D_shift(data, cs)


				if(sort): 
					group = iref
					for im in xrange(nima):
						imgroup = data[im].get_attr('group')
						if imgroup == iref:
							data[im].set_attr('xform.projection',transmulti[im][iref])
				else: 
					group = int(999) 
					for im in xrange(nima):
						data[im].set_attr('xform.projection',transmulti[im][iref])
				if(nrefs == 1):
					modout = ""
				else:
					modout = "_model_%02d"%(iref)	
				
				fscfile = os.path.join(outdir, "fsc_%s%s"%(itout,modout))
				vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF(data, sym, fscmask, fscfile, myid, main_node, index=group, npad=recon_pad)
	
				if myid == main_node:
					print_msg("3D reconstruction time for model %i: %s\n"%(iref, legibleTime(time()-start_time)))
					start_time = time()
	
				# Compute Fourier variance
				if fourvar:
					outvar = os.path.join(outdir, "volVar_%s.hdf"%(itout))
					ssnr_file = os.path.join(outdir, "ssnr_%s"%(itout))
					varf = varf3d_MPI(data, ssnr_text_file=ssnr_file, mask2D=None, reference_structure=vol[iref], ou=last_ring, rw=1.0, npad=1, CTF=None, sign=1, sym=sym, myid=myid)
					if myid == main_node:
						print_msg("Time to calculate 3D Fourier variance for model %i: %s\n"%(iref, legibleTime(time()-start_time)))
						start_time = time()
						varf = 1.0/varf
						varf.write_image(outvar,-1)
				else:  varf = None

				if myid == main_node:
					if helicalrecon:
						from hfunctions import processHelicalVol

						vstep=None
						if vertstep is not None:
							vstep=(vdp[iref],vdphi[iref])
						print_msg("Old rise and twist for model %i     : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref]))
						hvals=processHelicalVol(vol[iref],voleve[iref],volodd[iref],iref,outdir,itout,
									dp[iref],dphi[iref],apix,hsearch,findseam,vstep,wcmask)
						(vol[iref],voleve[iref],volodd[iref],dp[iref],dphi[iref],vdp[iref],vdphi[iref])=hvals
						print_msg("New rise and twist for model %i     : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref]))
						# get new FSC from symmetrized half volumes
						fscc = fsc_mask( volodd[iref], voleve[iref], mask3D, rstep, fscfile)

						print_msg("Time to search and apply helical symmetry for model %i: %s\n\n"%(iref, legibleTime(time()-start_time)))
						start_time = time()
					else:
						vol[iref].write_image(os.path.join(outdir, "vol_%s.hdf"%(itout)),-1)

					if save_half is True:
						volodd[iref].write_image(os.path.join(outdir, "volodd_%s.hdf"%(itout)),-1)
						voleve[iref].write_image(os.path.join(outdir, "voleve_%s.hdf"%(itout)),-1)

					if nmasks > 1:
						# Read mask for multiplying
						ref_data[0] = maskF[iref]
					ref_data[2] = vol[iref]
					ref_data[3] = fscc
					ref_data[4] = varf
					#  call user-supplied function to prepare reference image, i.e., center and filter it
					vol[iref], cs,fl = ref_ali3d(ref_data)
					vol[iref].write_image(os.path.join(outdir, "volf_%s.hdf"%(itout)),-1)
					if (apix == 1):
						res_msg = "Models filtered at spatial frequency of:\t"
						res[iref] = fl
					else:
						res_msg = "Models filtered at resolution of:       \t"
						res[iref] = apix / fl	
	
				del varf
				bcast_EMData_to_all(vol[iref], myid, main_node)
				
				if compare_ref_free != "-1": compare_repro = True
				if compare_repro:
					outfile_repro = comp_rep(refrings, data, itout, modout, vol[iref], group, nima, nx, myid, main_node, outdir)
					mpi_barrier(MPI_COMM_WORLD)
					if compare_ref_free != "-1":
						ref_free_output = os.path.join(outdir,"ref_free_%s%s"%(itout,modout))
						rejects = compare(compare_ref_free, outfile_repro,ref_free_output,yrng[N_step], xrng[N_step], rstep,nx,apix,ref_free_cutoff[N_step], number_of_proc, myid, main_node)

			# retrieve alignment params from all processors
			par_str = ['xform.projection','ID','group']
			if nrefs > 1:
				for iref in xrange(nrefs):
					par_str.append('eulers_txty.%i'%iref)

			if myid == main_node:
				from utilities import recv_attr_dict
				recv_attr_dict(main_node, stack, data, par_str, image_start, image_end, number_of_proc)
				
			else:	send_attr_dict(main_node, data, par_str, image_start, image_end)

			if myid == main_node:
				ares = array2string(array(res), precision = 2)
				print_msg("%s%s\n\n"%(res_msg,ares))
				dummy = EMData()
				if full_output:
					nimat = EMUtil.get_image_count(stack)
					output_file = os.path.join(outdir, "paramout_%s"%itout)
					foutput = open(output_file, 'w')
					for im in xrange(nimat):
						# save the parameters for each of the models
						outstring = ""
						dummy.read_image(stack,im,True)
						param3d = dummy.get_attr('xform.projection')
						g = dummy.get_attr("group")
						# retrieve alignments in EMAN-format
						pE = param3d.get_params('eman')
						outstring += "%f\t%f\t%f\t%f\t%f\t%i\n" %(pE["az"], pE["alt"], pE["phi"], pE["tx"], pE["ty"],g)
						foutput.write(outstring)
					foutput.close()
				del dummy
			mpi_barrier(MPI_COMM_WORLD)


#	mpi_finalize()	

	if myid == main_node: print_end_msg("ali3d_MPI")
Example #2
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = progname + """  input_micrograph_list_file  input_micrograph_pattern  input_coordinates_pattern  output_directory  --coordinates_format  --box_size=box_size  --invert  --import_ctf=ctf_file  --limit_ctf  --resample_ratio=resample_ratio  --defocus_error=defocus_error  --astigmatism_error=astigmatism_error
	
Window particles from micrographs in input list file. The coordinates of the particles should be given as input.
Please specify name pattern of input micrographs and coordinates files with a wild card (*). Use the wild card to indicate the place of micrograph ID (e.g. serial number, time stamp, and etc). 
The name patterns must be enclosed by single quotes (') or double quotes ("). (Note: sxgui.py automatically adds single quotes (')). 
BDB files can not be selected as input micrographs.
	
	sxwindow.py  mic_list.txt  ./mic*.hdf  info/mic*_info.json  particles  --coordinates_format=eman2  --box_size=64  --invert  --import_ctf=outdir_cter/partres/partres.txt
	
If micrograph list file name is not provided, all files matched with the micrograph name pattern will be processed.
	
	sxwindow.py  ./mic*.hdf  info/mic*_info.json  particles  --coordinates_format=eman2  --box_size=64  --invert  --import_ctf=outdir_cter/partres/partres.txt
	
"""
    parser = OptionParser(usage, version=SPARXVERSION)
    parser.add_option(
        "--coordinates_format",
        type="string",
        default="eman1",
        help=
        "format of input coordinates files: 'sparx', 'eman1', 'eman2', or 'spider'. the coordinates of sparx, eman2, and spider format is particle center. the coordinates of eman1 format is particle box conner associated with the original box size. (default eman1)"
    )
    parser.add_option(
        "--box_size",
        type="int",
        default=256,
        help=
        "x and y dimension of square area to be windowed (in pixels): pixel size after resampling is assumed when resample_ratio < 1.0 (default 256)"
    )
    parser.add_option(
        "--invert",
        action="store_true",
        default=False,
        help="invert image contrast: recommended for cryo data (default False)"
    )
    parser.add_option(
        "--import_ctf",
        type="string",
        default="",
        help="file name of sxcter output: normally partres.txt (default none)")
    parser.add_option(
        "--limit_ctf",
        action="store_true",
        default=False,
        help=
        "filter micrographs based on the CTF limit: this option requires --import_ctf. (default False)"
    )
    parser.add_option(
        "--resample_ratio",
        type="float",
        default=1.0,
        help=
        "ratio of new to old image size (or old to new pixel size) for resampling: Valid range is 0.0 < resample_ratio <= 1.0. (default 1.0)"
    )
    parser.add_option(
        "--defocus_error",
        type="float",
        default=1000000.0,
        help=
        "defocus errror limit: exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocus_error percent. the error is computed as (std dev defocus)/defocus*100%. (default 1000000.0)"
    )
    parser.add_option(
        "--astigmatism_error",
        type="float",
        default=360.0,
        help=
        "astigmatism error limit: Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatism_error degrees. (default 360.0)"
    )

    ### detect if program is running under MPI
    RUNNING_UNDER_MPI = "OMPI_COMM_WORLD_SIZE" in os.environ

    main_node = 0

    if RUNNING_UNDER_MPI:
        from mpi import mpi_init
        from mpi import MPI_COMM_WORLD, mpi_comm_rank, mpi_comm_size, mpi_barrier, mpi_reduce, MPI_INT, MPI_SUM

        mpi_init(0, [])
        myid = mpi_comm_rank(MPI_COMM_WORLD)
        number_of_processes = mpi_comm_size(MPI_COMM_WORLD)
    else:
        number_of_processes = 1
        myid = 0

    (options, args) = parser.parse_args(sys.argv[1:])

    mic_list_file_path = None
    mic_pattern = None
    coords_pattern = None
    error_status = None
    while True:
        if len(args) < 3 or len(args) > 4:
            error_status = (
                "Please check usage for number of arguments.\n Usage: " +
                usage + "\n" + "Please run %s -h for help." % (progname),
                getframeinfo(currentframe()))
            break

        if len(args) == 3:
            mic_pattern = args[0]
            coords_pattern = args[1]
            out_dir = args[2]
        else:  # assert(len(args) == 4)
            mic_list_file_path = args[0]
            mic_pattern = args[1]
            coords_pattern = args[2]
            out_dir = args[3]

        if mic_list_file_path != None:
            if os.path.splitext(mic_list_file_path)[1] != ".txt":
                error_status = (
                    "Extension of input micrograph list file must be \".txt\". Please check input_micrograph_list_file argument. Run %s -h for help."
                    % (progname), getframeinfo(currentframe()))
                break

        if mic_pattern[:len("bdb:")].lower() == "bdb":
            error_status = (
                "BDB file can not be selected as input micrographs. Please convert the format, and restart the program. Run %s -h for help."
                % (progname), getframeinfo(currentframe()))
            break

        if mic_pattern.find("*") == -1:
            error_status = (
                "Input micrograph file name pattern must contain wild card (*). Please check input_micrograph_pattern argument. Run %s -h for help."
                % (progname), getframeinfo(currentframe()))
            break

        if coords_pattern.find("*") == -1:
            error_status = (
                "Input coordinates file name pattern must contain wild card (*). Please check input_coordinates_pattern argument. Run %s -h for help."
                % (progname), getframeinfo(currentframe()))
            break

        if myid == main_node:
            if os.path.exists(out_dir):
                error_status = (
                    "Output directory exists. Please change the name and restart the program.",
                    getframeinfo(currentframe()))
                break

        break
    if_error_then_all_processes_exit_program(error_status)

    # Check invalid conditions of options
    check_options(options, progname)

    mic_name_list = None
    error_status = None
    if myid == main_node:
        if mic_list_file_path != None:
            print("Loading micrograph list from %s file ..." %
                  (mic_list_file_path))
            mic_name_list = read_text_file(mic_list_file_path)
            if len(mic_name_list) == 0:
                print("Directory of first micrograph entry is " %
                      (os.path.dirname(mic_name_list[0])))
        else:  # assert (mic_list_file_path == None)
            print("Generating micrograph list in %s directory..." %
                  (os.path.dirname(mic_pattern)))
            mic_name_list = glob.glob(mic_pattern)
        if len(mic_name_list) == 0:
            error_status = (
                "No micrograph file is found. Please check input_micrograph_pattern and/or input_micrograph_list_file argument. Run %s -h for help."
                % (progname), getframeinfo(currentframe()))
        else:
            print("Found %d microgarphs" % len(mic_name_list))

    if_error_then_all_processes_exit_program(error_status)
    if RUNNING_UNDER_MPI:
        mic_name_list = wrap_mpi_bcast(mic_name_list, main_node)

    coords_name_list = None
    error_status = None
    if myid == main_node:
        coords_name_list = glob.glob(coords_pattern)
        if len(coords_name_list) == 0:
            error_status = (
                "No coordinates file is found. Please check input_coordinates_pattern argument. Run %s -h for help."
                % (progname), getframeinfo(currentframe()))
    if_error_then_all_processes_exit_program(error_status)
    if RUNNING_UNDER_MPI:
        coords_name_list = wrap_mpi_bcast(coords_name_list, main_node)

##################################################################################################################################################################################################################
##################################################################################################################################################################################################################
##################################################################################################################################################################################################################

# all processes must have access to indices
    if options.import_ctf:
        i_enum = -1
        i_enum += 1
        idx_cter_def = i_enum  # defocus [um]; index must be same as ctf object format
        i_enum += 1
        idx_cter_cs = i_enum  # Cs [mm]; index must be same as ctf object format
        i_enum += 1
        idx_cter_vol = i_enum  # voltage[kV]; index must be same as ctf object format
        i_enum += 1
        idx_cter_apix = i_enum  # pixel size [A]; index must be same as ctf object format
        i_enum += 1
        idx_cter_bfactor = i_enum  # B-factor [A^2]; index must be same as ctf object format
        i_enum += 1
        idx_cter_ac = i_enum  # amplitude contrast [%]; index must be same as ctf object format
        i_enum += 1
        idx_cter_astig_amp = i_enum  # astigmatism amplitude [um]; index must be same as ctf object format
        i_enum += 1
        idx_cter_astig_ang = i_enum  # astigmatism angle [degree]; index must be same as ctf object format
        i_enum += 1
        idx_cter_sd_def = i_enum  # std dev of defocus [um]
        i_enum += 1
        idx_cter_sd_astig_amp = i_enum  # std dev of ast amp [A]
        i_enum += 1
        idx_cter_sd_astig_ang = i_enum  # std dev of ast angle [degree]
        i_enum += 1
        idx_cter_cv_def = i_enum  # coefficient of variation of defocus [%]
        i_enum += 1
        idx_cter_cv_astig_amp = i_enum  # coefficient of variation of ast amp [%]
        i_enum += 1
        idx_cter_spectra_diff = i_enum  # average of differences between with- and without-astig. experimental 1D spectra at extrema
        i_enum += 1
        idx_cter_error_def = i_enum  # frequency at which signal drops by 50% due to estimated error of defocus alone [1/A]
        i_enum += 1
        idx_cter_error_astig = i_enum  # frequency at which signal drops by 50% due to estimated error of defocus and astigmatism [1/A]
        i_enum += 1
        idx_cter_error_ctf = i_enum  # limit frequency by CTF error [1/A]
        i_enum += 1
        idx_cter_mic_name = i_enum  # micrograph name
        i_enum += 1
        n_idx_cter = i_enum

    # Prepare loop variables
    mic_basename_pattern = os.path.basename(
        mic_pattern)  # file pattern without path
    mic_baseroot_pattern = os.path.splitext(mic_basename_pattern)[
        0]  # file pattern without path and extension
    coords_format = options.coordinates_format.lower()
    box_size = options.box_size
    box_half = box_size // 2
    mask2d = model_circle(
        box_size // 2, box_size, box_size
    )  # Create circular 2D mask to Util.infomask of particle images
    resample_ratio = options.resample_ratio

    n_mic_process = 0
    n_mic_reject_no_coords = 0
    n_mic_reject_no_cter_entry = 0
    n_global_coords_detect = 0
    n_global_coords_process = 0
    n_global_coords_reject_out_of_boundary = 0

    serial_id_list = []
    error_status = None
    ## not a real while, an if with the opportunity to use break when errors need to be reported
    while myid == main_node:
        #
        # NOTE: 2016/05/24 Toshio Moriya
        # Now, ignores the path in mic_pattern and entries of mic_name_list to create serial ID
        # Only the basename (file name) in micrograph path must be match
        #
        # Create list of micrograph serial ID
        # Break micrograph name pattern into prefix and suffix to find the head index of the micrograph serial id
        #
        mic_basename_tokens = mic_basename_pattern.split('*')
        # assert (len(mic_basename_tokens) == 2)
        serial_id_head_index = len(mic_basename_tokens[0])
        # Loop through micrograph names
        for mic_name in mic_name_list:
            # Find the tail index of the serial id and extract serial id from the micrograph name
            mic_basename = os.path.basename(mic_name)
            serial_id_tail_index = mic_basename.index(mic_basename_tokens[1])
            serial_id = mic_basename[serial_id_head_index:serial_id_tail_index]
            serial_id_list.append(serial_id)
        # assert (len(serial_id_list) == len(mic_name))
        del mic_name_list  # Do not need this anymore

        # Load CTFs if necessary
        if options.import_ctf:

            ctf_list = read_text_row(options.import_ctf)
            # print("Detected CTF entries : %6d ..." % (len(ctf_list)))

            if len(ctf_list) == 0:
                error_status = (
                    "No CTF entry is found in %s. Please check --import_ctf option. Run %s -h for help."
                    % (options.import_ctf, progname),
                    getframeinfo(currentframe()))
                break

            if (len(ctf_list[0]) != n_idx_cter):
                error_status = (
                    "Number of columns (%d) must be %d in %s. The format might be old. Please run sxcter.py again."
                    % (len(ctf_list[0]), n_idx_cter, options.import_ctf),
                    getframeinfo(currentframe()))
                break

            ctf_dict = {}
            n_reject_defocus_error = 0
            ctf_error_limit = [
                options.defocus_error / 100.0, options.astigmatism_error
            ]
            for ctf_params in ctf_list:
                assert (len(ctf_params) == n_idx_cter)
                # mic_baseroot is name of micrograph minus the path and extension
                mic_baseroot = os.path.splitext(
                    os.path.basename(ctf_params[idx_cter_mic_name]))[0]
                if (ctf_params[idx_cter_sd_def] / ctf_params[idx_cter_def] >
                        ctf_error_limit[0]):
                    print(
                        "Defocus error %f exceeds the threshold. Micrograph %s is rejected."
                        % (ctf_params[idx_cter_sd_def] /
                           ctf_params[idx_cter_def], mic_baseroot))
                    n_reject_defocus_error += 1
                else:
                    if (ctf_params[idx_cter_sd_astig_ang] >
                            ctf_error_limit[1]):
                        ctf_params[idx_cter_astig_amp] = 0.0
                        ctf_params[idx_cter_astig_ang] = 0.0
                    ctf_dict[mic_baseroot] = ctf_params
            del ctf_list  # Do not need this anymore

        break

    if_error_then_all_processes_exit_program(error_status)

    if options.import_ctf:
        if options.limit_ctf:
            cutoff_histogram = [
            ]  #@ming compute the histogram for micrographs cut of by ctf_params limit.

##################################################################################################################################################################################################################
##################################################################################################################################################################################################################
##################################################################################################################################################################################################################

    restricted_serial_id_list = []
    if myid == main_node:
        # Loop over serial IDs of micrographs
        for serial_id in serial_id_list:
            # mic_baseroot is name of micrograph minus the path and extension
            mic_baseroot = mic_baseroot_pattern.replace("*", serial_id)
            mic_name = mic_pattern.replace("*", serial_id)
            coords_name = coords_pattern.replace("*", serial_id)

            ########### # CHECKS: BEGIN
            if coords_name not in coords_name_list:
                print("    Cannot read %s. Skipping %s ..." %
                      (coords_name, mic_baseroot))
                n_mic_reject_no_coords += 1
                continue

            # IF mic is in CTER results
            if options.import_ctf:
                if mic_baseroot not in ctf_dict:
                    print(
                        "    Is not listed in CTER results. Skipping %s ..." %
                        (mic_baseroot))
                    n_mic_reject_no_cter_entry += 1
                    continue
                else:
                    ctf_params = ctf_dict[mic_baseroot]
            # CHECKS: END

            n_mic_process += 1

            restricted_serial_id_list.append(serial_id)
        # restricted_serial_id_list = restricted_serial_id_list[:128]  ## for testing against the nonMPI version

    if myid != main_node:
        if options.import_ctf:
            ctf_dict = None

    error_status = None
    if len(restricted_serial_id_list) < number_of_processes:
        error_status = (
            'Number of processes (%d) supplied by --np in mpirun cannot be greater than %d (number of micrographs that satisfy all criteria to be processed) '
            % (number_of_processes, len(restricted_serial_id_list)),
            getframeinfo(currentframe()))
    if_error_then_all_processes_exit_program(error_status)

    ## keep a copy of the original output directory where the final bdb will be created
    original_out_dir = out_dir
    if RUNNING_UNDER_MPI:
        mpi_barrier(MPI_COMM_WORLD)
        restricted_serial_id_list = wrap_mpi_bcast(restricted_serial_id_list,
                                                   main_node)
        mic_start, mic_end = MPI_start_end(len(restricted_serial_id_list),
                                           number_of_processes, myid)
        restricted_serial_id_list_not_sliced = restricted_serial_id_list
        restricted_serial_id_list = restricted_serial_id_list[
            mic_start:mic_end]

        if options.import_ctf:
            ctf_dict = wrap_mpi_bcast(ctf_dict, main_node)

        # generate subdirectories of out_dir, one for each process
        out_dir = os.path.join(out_dir, "%03d" % myid)

    if myid == main_node:
        print(
            "Micrographs processed by main process (including percent complete):"
        )

    len_processed_by_main_node_divided_by_100 = len(
        restricted_serial_id_list) / 100.0

    ##################################################################################################################################################################################################################
    ##################################################################################################################################################################################################################
    ##################################################################################################################################################################################################################
    #####  Starting main parallel execution

    for my_idx, serial_id in enumerate(restricted_serial_id_list):
        mic_baseroot = mic_baseroot_pattern.replace("*", serial_id)
        mic_name = mic_pattern.replace("*", serial_id)
        coords_name = coords_pattern.replace("*", serial_id)

        if myid == main_node:
            print(
                mic_name, " ---> % 2.2f%%" %
                (my_idx / len_processed_by_main_node_divided_by_100))
        mic_img = get_im(mic_name)

        # Read coordinates according to the specified format and
        # make the coordinates the center of particle image
        if coords_format == "sparx":
            coords_list = read_text_row(coords_name)
        elif coords_format == "eman1":
            coords_list = read_text_row(coords_name)
            for i in xrange(len(coords_list)):
                coords_list[i] = [(coords_list[i][0] + coords_list[i][2] // 2),
                                  (coords_list[i][1] + coords_list[i][3] // 2)]
        elif coords_format == "eman2":
            coords_list = js_open_dict(coords_name)["boxes"]
            for i in xrange(len(coords_list)):
                coords_list[i] = [coords_list[i][0], coords_list[i][1]]
        elif coords_format == "spider":
            coords_list = read_text_row(coords_name)
            for i in xrange(len(coords_list)):
                coords_list[i] = [coords_list[i][2], coords_list[i][3]]
            # else: assert (False) # Unreachable code

        # Calculate the new pixel size
        if options.import_ctf:
            ctf_params = ctf_dict[mic_baseroot]
            pixel_size_origin = ctf_params[idx_cter_apix]

            if resample_ratio < 1.0:
                # assert (resample_ratio > 0.0)
                new_pixel_size = pixel_size_origin / resample_ratio
                print(
                    "Resample micrograph to pixel size %6.4f and window segments from resampled micrograph."
                    % new_pixel_size)
            else:
                # assert (resample_ratio == 1.0)
                new_pixel_size = pixel_size_origin

            # Set ctf along with new pixel size in resampled micrograph
            ctf_params[idx_cter_apix] = new_pixel_size
        else:
            # assert (not options.import_ctf)
            if resample_ratio < 1.0:
                # assert (resample_ratio > 0.0)
                print(
                    "Resample micrograph with ratio %6.4f and window segments from resampled micrograph."
                    % resample_ratio)
            # else:
            #	assert (resample_ratio == 1.0)

        # Apply filters to micrograph
        fftip(mic_img)
        if options.limit_ctf:
            # assert (options.import_ctf)
            # Cut off frequency components higher than CTF limit
            q1, q2 = ctflimit(box_size, ctf_params[idx_cter_def],
                              ctf_params[idx_cter_cs],
                              ctf_params[idx_cter_vol], new_pixel_size)

            # This is absolute frequency of CTF limit in scale of original micrograph
            if resample_ratio < 1.0:
                # assert (resample_ratio > 0.0)
                q1 = resample_ratio * q1 / float(
                    box_size
                )  # q1 = (pixel_size_origin / new_pixel_size) * q1/float(box_size)
            else:
                # assert (resample_ratio == 1.0) -> pixel_size_origin == new_pixel_size -> pixel_size_origin / new_pixel_size == 1.0
                q1 = q1 / float(box_size)

            if q1 < 0.5:
                mic_img = filt_tanl(mic_img, q1, 0.01)
                cutoff_histogram.append(q1)

        # Cut off frequency components lower than the box size can express
        mic_img = fft(filt_gaussh(mic_img, resample_ratio / box_size))

        # Resample micrograph, map coordinates, and window segments from resampled micrograph using new coordinates
        # after resampling by resample_ratio, new pixel size will be pixel_size/resample_ratio = new_pixel_size
        # NOTE: 2015/04/13 Toshio Moriya
        # resample() efficiently takes care of the case resample_ratio = 1.0 but
        # it does not set apix_*. Even though it sets apix_* when resample_ratio < 1.0 ...
        mic_img = resample(mic_img, resample_ratio)

        if options.invert:
            mic_stats = Util.infomask(
                mic_img, None, True)  # mic_stat[0:mean, 1:SD, 2:min, 3:max]
            Util.mul_scalar(mic_img, -1.0)
            mic_img += 2 * mic_stats[0]

        if options.import_ctf:
            from utilities import generate_ctf
            ctf_obj = generate_ctf(
                ctf_params
            )  # indexes 0 to 7 (idx_cter_def to idx_cter_astig_ang) must be same in cter format & ctf object format.

        # Prepare loop variables
        nx = mic_img.get_xsize()
        ny = mic_img.get_ysize()
        x0 = nx // 2
        y0 = ny // 2

        n_coords_reject_out_of_boundary = 0
        local_stack_name = "bdb:%s#" % out_dir + mic_baseroot + '_ptcls'
        local_particle_id = 0  # can be different from coordinates_id
        # Loop over coordinates
        for coords_id in xrange(len(coords_list)):

            x = int(coords_list[coords_id][0])
            y = int(coords_list[coords_id][1])

            if resample_ratio < 1.0:
                # assert (resample_ratio > 0.0)
                x = int(x * resample_ratio)
                y = int(y * resample_ratio)
            # else:
            # 	assert(resample_ratio == 1.0)

            if ((0 <= x - box_half) and (x + box_half <= nx)
                    and (0 <= y - box_half) and (y + box_half <= ny)):
                particle_img = Util.window(mic_img, box_size, box_size, 1,
                                           x - x0, y - y0)
            else:
                print(
                    "In %s, coordinates ID = %04d (x = %4d, y = %4d, box_size = %4d) is out of micrograph bound, skipping ..."
                    % (mic_baseroot, coords_id, x, y, box_size))
                n_coords_reject_out_of_boundary += 1
                continue

            particle_img = ramp(particle_img)
            particle_stats = Util.infomask(
                particle_img, mask2d,
                False)  # particle_stats[0:mean, 1:SD, 2:min, 3:max]
            particle_img -= particle_stats[0]
            particle_img /= particle_stats[1]

            # NOTE: 2015/04/09 Toshio Moriya
            # ptcl_source_image might be redundant information ...
            # Consider re-organizing header entries...
            particle_img.set_attr("ptcl_source_image", mic_name)
            particle_img.set_attr("ptcl_source_coord_id", coords_id)
            particle_img.set_attr("ptcl_source_coord", [
                int(coords_list[coords_id][0]),
                int(coords_list[coords_id][1])
            ])
            particle_img.set_attr("resample_ratio", resample_ratio)

            # NOTE: 2015/04/13 Toshio Moriya
            # apix_* attributes are updated by resample() only when resample_ratio != 1.0
            # Let's make sure header info is consistent by setting apix_* = 1.0
            # regardless of options, so it is not passed down the processing line
            particle_img.set_attr("apix_x", 1.0)
            particle_img.set_attr("apix_y", 1.0)
            particle_img.set_attr("apix_z", 1.0)
            if options.import_ctf:
                particle_img.set_attr("ctf", ctf_obj)
                particle_img.set_attr("ctf_applied", 0)
                particle_img.set_attr("pixel_size_origin", pixel_size_origin)
                # particle_img.set_attr("apix_x", new_pixel_size)
                # particle_img.set_attr("apix_y", new_pixel_size)
                # particle_img.set_attr("apix_z", new_pixel_size)
            # NOTE: 2015/04/13 Toshio Moriya
            # Pawel Comment: Micrograph is not supposed to have CTF header info.
            # So, let's assume it does not exist & ignore its presence.
            # Note that resample() "correctly" updates pixel size of CTF header info if it exists
            # elif (particle_img.has_ctff()):
            # 	assert(not options.import_ctf)
            # 	ctf_origin = particle_img.get_attr("ctf_obj")
            # 	pixel_size_origin = round(ctf_origin.apix, 5) # Because SXCTER ouputs up to 5 digits
            # 	particle_img.set_attr("apix_x",pixel_size_origin)
            # 	particle_img.set_attr("apix_y",pixel_size_origin)
            # 	particle_img.set_attr("apix_z",pixel_size_origin)

            # print("local_stack_name, local_particle_id", local_stack_name, local_particle_id)
            particle_img.write_image(local_stack_name, local_particle_id)
            local_particle_id += 1

        n_global_coords_detect += len(coords_list)
        n_global_coords_process += local_particle_id
        n_global_coords_reject_out_of_boundary += n_coords_reject_out_of_boundary

        #		# MRK_DEBUG: Toshio Moriya 2016/05/03
        #		# Following codes are for debugging bdb. Delete in future
        #		result = db_check_dict(local_stack_name)
        #		print('# MRK_DEBUG: result = db_check_dict(local_stack_name): %s' % (result))
        #		result = db_list_dicts('bdb:%s' % out_dir)
        #		print('# MRK_DEBUG: result = db_list_dicts(out_dir): %s' % (result))
        #		result = db_get_image_info(local_stack_name)
        #		print('# MRK_DEBUG: result = db_get_image_info(local_stack_name)', result)

        # Release the data base of local stack from this process
        # so that the subprocess can access to the data base
        db_close_dict(local_stack_name)


#		# MRK_DEBUG: Toshio Moriya 2016/05/03
#		# Following codes are for debugging bdb. Delete in future
#		cmd_line = "e2iminfo.py %s" % (local_stack_name)
#		print('# MRK_DEBUG: Executing the command: %s' % (cmd_line))
#		cmdexecute(cmd_line)

#		# MRK_DEBUG: Toshio Moriya 2016/05/03
#		# Following codes are for debugging bdb. Delete in future
#		cmd_line = "e2iminfo.py bdb:%s#data" % (out_dir)
#		print('# MRK_DEBUG: Executing the command: %s' % (cmd_line))
#		cmdexecute(cmd_line)

    if RUNNING_UNDER_MPI:
        if options.import_ctf:
            if options.limit_ctf:
                cutoff_histogram = wrap_mpi_gatherv(cutoff_histogram,
                                                    main_node)

    if myid == main_node:
        if options.limit_ctf:
            # Print out the summary of CTF-limit filtering
            print(" ")
            print("Global summary of CTF-limit filtering (--limit_ctf) ...")
            print("Percentage of filtered micrographs: %8.2f\n" %
                  (len(cutoff_histogram) * 100.0 /
                   len(restricted_serial_id_list_not_sliced)))

            n_bins = 10
            if len(cutoff_histogram) >= n_bins:
                from statistics import hist_list
                cutoff_region, cutoff_counts = hist_list(
                    cutoff_histogram, n_bins)
                print("      Histogram of cut-off frequency")
                print("      cut-off       counts")
                for bin_id in xrange(n_bins):
                    print(" %14.7f     %7d" %
                          (cutoff_region[bin_id], cutoff_counts[bin_id]))
            else:
                print(
                    "The number of filtered micrographs (%d) is less than the number of bins (%d). No histogram is produced."
                    % (len(cutoff_histogram), n_bins))

    n_mic_process = mpi_reduce(n_mic_process, 1, MPI_INT, MPI_SUM, main_node,
                               MPI_COMM_WORLD)
    n_mic_reject_no_coords = mpi_reduce(n_mic_reject_no_coords, 1, MPI_INT,
                                        MPI_SUM, main_node, MPI_COMM_WORLD)
    n_mic_reject_no_cter_entry = mpi_reduce(n_mic_reject_no_cter_entry, 1,
                                            MPI_INT, MPI_SUM, main_node,
                                            MPI_COMM_WORLD)
    n_global_coords_detect = mpi_reduce(n_global_coords_detect, 1, MPI_INT,
                                        MPI_SUM, main_node, MPI_COMM_WORLD)
    n_global_coords_process = mpi_reduce(n_global_coords_process, 1, MPI_INT,
                                         MPI_SUM, main_node, MPI_COMM_WORLD)
    n_global_coords_reject_out_of_boundary = mpi_reduce(
        n_global_coords_reject_out_of_boundary, 1, MPI_INT, MPI_SUM, main_node,
        MPI_COMM_WORLD)

    # Print out the summary of all micrographs
    if main_node == myid:
        print(" ")
        print("Global summary of micrographs ...")
        print("Detected                        : %6d" %
              (len(restricted_serial_id_list_not_sliced)))
        print("Processed                       : %6d" % (n_mic_process))
        print("Rejected by no coordinates file : %6d" %
              (n_mic_reject_no_coords))
        print("Rejected by no CTER entry       : %6d" %
              (n_mic_reject_no_cter_entry))
        print(" ")
        print("Global summary of coordinates ...")
        print("Detected                        : %6d" %
              (n_global_coords_detect))
        print("Processed                       : %6d" %
              (n_global_coords_process))
        print("Rejected by out of boundary     : %6d" %
              (n_global_coords_reject_out_of_boundary))
        # print(" ")
        # print("DONE!!!")

    mpi_barrier(MPI_COMM_WORLD)

    if main_node == myid:

        import time
        time.sleep(1)
        print("\n Creating bdb:%s/data\n" % original_out_dir)
        for proc_i in range(number_of_processes):
            mic_start, mic_end = MPI_start_end(
                len(restricted_serial_id_list_not_sliced), number_of_processes,
                proc_i)
            for serial_id in restricted_serial_id_list_not_sliced[
                    mic_start:mic_end]:
                e2bdb_command = "e2bdb.py "
                mic_baseroot = mic_baseroot_pattern.replace("*", serial_id)
                if RUNNING_UNDER_MPI:
                    e2bdb_command += "bdb:" + os.path.join(
                        original_out_dir,
                        "%03d/" % proc_i) + mic_baseroot + "_ptcls "
                else:
                    e2bdb_command += "bdb:" + os.path.join(
                        original_out_dir, mic_baseroot + "_ptcls ")

                e2bdb_command += " --appendvstack=bdb:%s/data  1>/dev/null" % original_out_dir
                cmdexecute(e2bdb_command, printing_on_success=False)

        print("Done!\n")

    if RUNNING_UNDER_MPI:
        mpi_barrier(MPI_COMM_WORLD)
        from mpi import mpi_finalize
        mpi_finalize()

    sys.stdout.flush()
    sys.exit(0)
Example #3
0
def ali3d_MPI(stack,
              ref_vol,
              outdir,
              maskfile=None,
              ir=1,
              ou=-1,
              rs=1,
              xr="4 2 2 1",
              yr="-1",
              ts="1 1 0.5 0.25",
              delta="10 6 4 4",
              an="-1",
              center=0,
              maxit=5,
              term=95,
              CTF=False,
              fourvar=False,
              snr=1.0,
              ref_a="S",
              sym="c1",
              sort=True,
              cutoff=999.99,
              pix_cutoff="0",
              two_tail=False,
              model_jump="1 1 1 1 1",
              restart=False,
              save_half=False,
              protos=None,
              oplane=None,
              lmask=-1,
              ilmask=-1,
              findseam=False,
              vertstep=None,
              hpars="-1",
              hsearch="0.0 50.0",
              full_output=False,
              compare_repro=False,
              compare_ref_free="-1",
              ref_free_cutoff="-1 -1 -1 -1",
              wcmask=None,
              debug=False,
              recon_pad=4,
              olmask=75):

    from alignment import Numrinit, prepare_refrings
    from utilities import model_circle, get_image, drop_image, get_input_from_string
    from utilities import bcast_list_to_all, bcast_number_to_all, reduce_EMData_to_root, bcast_EMData_to_all
    from utilities import send_attr_dict
    from utilities import get_params_proj, file_type
    from fundamentals import rot_avg_image
    import os
    import types
    from utilities import print_begin_msg, print_end_msg, print_msg
    from mpi import mpi_bcast, mpi_comm_size, mpi_comm_rank, MPI_FLOAT, MPI_COMM_WORLD, mpi_barrier, mpi_reduce
    from mpi import mpi_reduce, MPI_INT, MPI_SUM, mpi_finalize
    from filter import filt_ctf
    from projection import prep_vol, prgs
    from statistics import hist_list, varf3d_MPI, fsc_mask
    from numpy import array, bincount, array2string, ones

    number_of_proc = mpi_comm_size(MPI_COMM_WORLD)
    myid = mpi_comm_rank(MPI_COMM_WORLD)
    main_node = 0
    if myid == main_node:
        if os.path.exists(outdir):
            ERROR(
                'Output directory exists, please change the name and restart the program',
                "ali3d_MPI", 1)
        os.mkdir(outdir)
    mpi_barrier(MPI_COMM_WORLD)

    if debug:
        from time import sleep
        while not os.path.exists(outdir):
            print "Node ", myid, "  waiting..."
            sleep(5)

        info_file = os.path.join(outdir, "progress%04d" % myid)
        finfo = open(info_file, 'w')
    else:
        finfo = None
    mjump = get_input_from_string(model_jump)
    xrng = get_input_from_string(xr)
    if yr == "-1": yrng = xrng
    else: yrng = get_input_from_string(yr)
    step = get_input_from_string(ts)
    delta = get_input_from_string(delta)
    ref_free_cutoff = get_input_from_string(ref_free_cutoff)
    pix_cutoff = get_input_from_string(pix_cutoff)

    lstp = min(len(xrng), len(yrng), len(step), len(delta))
    if an == "-1":
        an = [-1] * lstp
    else:
        an = get_input_from_string(an)
    # make sure pix_cutoff is set for all iterations
    if len(pix_cutoff) < lstp:
        for i in xrange(len(pix_cutoff), lstp):
            pix_cutoff.append(pix_cutoff[-1])
    # don't waste time on sub-pixel alignment for low-resolution ang incr
    for i in range(len(step)):
        if (delta[i] > 4 or delta[i] == -1) and step[i] < 1:
            step[i] = 1

    first_ring = int(ir)
    rstep = int(rs)
    last_ring = int(ou)
    max_iter = int(maxit)
    center = int(center)

    nrefs = EMUtil.get_image_count(ref_vol)
    nmasks = 0
    if maskfile:
        # read number of masks within each maskfile (mc)
        nmasks = EMUtil.get_image_count(maskfile)
        # open masks within maskfile (mc)
        maskF = EMData.read_images(maskfile, xrange(nmasks))
    vol = EMData.read_images(ref_vol, xrange(nrefs))
    nx = vol[0].get_xsize()

    ## make sure box sizes are the same
    if myid == main_node:
        im = EMData.read_images(stack, [0])
        bx = im[0].get_xsize()
        if bx != nx:
            print_msg(
                "Error: Stack box size (%i) differs from initial model (%i)\n"
                % (bx, nx))
            sys.exit()
        del im, bx

    # for helical processing:
    helicalrecon = False
    if protos is not None or hpars != "-1" or findseam is True:
        helicalrecon = True
        # if no out-of-plane param set, use 5 degrees
        if oplane is None:
            oplane = 5.0
    if protos is not None:
        proto = get_input_from_string(protos)
        if len(proto) != nrefs:
            print_msg("Error: insufficient protofilament numbers supplied")
            sys.exit()
    if hpars != "-1":
        hpars = get_input_from_string(hpars)
        if len(hpars) != 2 * nrefs:
            print_msg("Error: insufficient helical parameters supplied")
            sys.exit()
    ## create helical parameter file for helical reconstruction
    if helicalrecon is True and myid == main_node:
        from hfunctions import createHpar
        # create initial helical parameter files
        dp = [0] * nrefs
        dphi = [0] * nrefs
        vdp = [0] * nrefs
        vdphi = [0] * nrefs
        for iref in xrange(nrefs):
            hpar = os.path.join(outdir, "hpar%02d.spi" % (iref))
            params = False
            if hpars != "-1":
                # if helical parameters explicitly given, set twist & rise
                params = [float(hpars[iref * 2]), float(hpars[(iref * 2) + 1])]
            dp[iref], dphi[iref], vdp[iref], vdphi[iref] = createHpar(
                hpar, proto[iref], params, vertstep)

    # get values for helical search parameters
    hsearch = get_input_from_string(hsearch)
    if len(hsearch) != 2:
        print_msg("Error: specify outer and inner radii for helical search")
        sys.exit()

    if last_ring < 0 or last_ring > int(nx / 2) - 2:
        last_ring = int(nx / 2) - 2

    if myid == main_node:
        #	import user_functions
        #	user_func = user_functions.factory[user_func_name]

        print_begin_msg("ali3d_MPI")
        print_msg("Input stack		 : %s\n" % (stack))
        print_msg("Reference volume	    : %s\n" % (ref_vol))
        print_msg("Output directory	    : %s\n" % (outdir))
        if nmasks > 0:
            print_msg("Maskfile (number of masks)  : %s (%i)\n" %
                      (maskfile, nmasks))
        print_msg("Inner radius		: %i\n" % (first_ring))
        print_msg("Outer radius		: %i\n" % (last_ring))
        print_msg("Ring step		   : %i\n" % (rstep))
        print_msg("X search range	      : %s\n" % (xrng))
        print_msg("Y search range	      : %s\n" % (yrng))
        print_msg("Translational step	  : %s\n" % (step))
        print_msg("Angular step		: %s\n" % (delta))
        print_msg("Angular search range	: %s\n" % (an))
        print_msg("Maximum iteration	   : %i\n" % (max_iter))
        print_msg("Center type		 : %i\n" % (center))
        print_msg("CTF correction	      : %s\n" % (CTF))
        print_msg("Signal-to-Noise Ratio       : %f\n" % (snr))
        print_msg("Reference projection method : %s\n" % (ref_a))
        print_msg("Symmetry group	      : %s\n" % (sym))
        print_msg("Fourier padding for 3D      : %i\n" % (recon_pad))
        print_msg("Number of reference models  : %i\n" % (nrefs))
        print_msg("Sort images between models  : %s\n" % (sort))
        print_msg("Allow images to jump	: %s\n" % (mjump))
        print_msg("CC cutoff standard dev      : %f\n" % (cutoff))
        print_msg("Two tail cutoff	     : %s\n" % (two_tail))
        print_msg("Termination pix error       : %f\n" % (term))
        print_msg("Pixel error cutoff	  : %s\n" % (pix_cutoff))
        print_msg("Restart		     : %s\n" % (restart))
        print_msg("Full output		 : %s\n" % (full_output))
        print_msg("Compare reprojections       : %s\n" % (compare_repro))
        print_msg("Compare ref free class avgs : %s\n" % (compare_ref_free))
        print_msg("Use cutoff from ref free    : %s\n" % (ref_free_cutoff))
        if protos:
            print_msg("Protofilament numbers	: %s\n" % (proto))
            print_msg("Using helical search range   : %s\n" % hsearch)
        if findseam is True:
            print_msg("Using seam-based reconstruction\n")
        if hpars != "-1":
            print_msg("Using hpars		  : %s\n" % hpars)
        if vertstep != None:
            print_msg("Using vertical step    : %.2f\n" % vertstep)
        if save_half is True:
            print_msg("Saving even/odd halves\n")
        for i in xrange(100):
            print_msg("*")
        print_msg("\n\n")
    if maskfile:
        if type(maskfile) is types.StringType: mask3D = get_image(maskfile)
        else: mask3D = maskfile
    else: mask3D = model_circle(last_ring, nx, nx, nx)

    numr = Numrinit(first_ring, last_ring, rstep, "F")
    mask2D = model_circle(last_ring, nx, nx) - model_circle(first_ring, nx, nx)

    fscmask = model_circle(last_ring, nx, nx, nx)
    if CTF:
        from filter import filt_ctf
    from reconstruction_rjh import rec3D_MPI_noCTF

    if myid == main_node:
        active = EMUtil.get_all_attributes(stack, 'active')
        list_of_particles = []
        for im in xrange(len(active)):
            if active[im]: list_of_particles.append(im)
        del active
        nima = len(list_of_particles)
    else:
        nima = 0
    total_nima = bcast_number_to_all(nima, source_node=main_node)

    if myid != main_node:
        list_of_particles = [-1] * total_nima
    list_of_particles = bcast_list_to_all(list_of_particles,
                                          source_node=main_node)

    image_start, image_end = MPI_start_end(total_nima, number_of_proc, myid)

    # create a list of images for each node
    list_of_particles = list_of_particles[image_start:image_end]
    nima = len(list_of_particles)
    if debug:
        finfo.write("image_start, image_end: %d %d\n" %
                    (image_start, image_end))
        finfo.flush()

    data = EMData.read_images(stack, list_of_particles)

    t_zero = Transform({
        "type": "spider",
        "phi": 0,
        "theta": 0,
        "psi": 0,
        "tx": 0,
        "ty": 0
    })
    transmulti = [[t_zero for i in xrange(nrefs)] for j in xrange(nima)]

    for iref, im in ((iref, im) for iref in xrange(nrefs)
                     for im in xrange(nima)):
        if nrefs == 1:
            transmulti[im][iref] = data[im].get_attr("xform.projection")
        else:
            # if multi models, keep track of eulers for all models
            try:
                transmulti[im][iref] = data[im].get_attr("eulers_txty.%i" %
                                                         iref)
            except:
                data[im].set_attr("eulers_txty.%i" % iref, t_zero)

    scoremulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)]
    pixelmulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)]
    ref_res = [0.0 for x in xrange(nrefs)]
    apix = data[0].get_attr('apix_x')

    # for oplane parameter, create cylindrical mask
    if oplane is not None and myid == main_node:
        from hfunctions import createCylMask
        cmaskf = os.path.join(outdir, "mask3D_cyl.mrc")
        mask3D = createCylMask(data, olmask, lmask, ilmask, cmaskf)
        # if finding seam of helix, create wedge masks
        if findseam is True:
            wedgemask = []
            for pf in xrange(nrefs):
                wedgemask.append(EMData())
            # wedgemask option
            if wcmask is not None:
                wcmask = get_input_from_string(wcmask)
                if len(wcmask) != 3:
                    print_msg(
                        "Error: wcmask option requires 3 values: x y radius")
                    sys.exit()

    # determine if particles have helix info:
    try:
        data[0].get_attr('h_angle')
        original_data = []
        boxmask = True
        from hfunctions import createBoxMask
    except:
        boxmask = False

    # prepare particles
    for im in xrange(nima):
        data[im].set_attr('ID', list_of_particles[im])
        data[im].set_attr('pix_score', int(0))
        if CTF:
            # only phaseflip particles, not full CTF correction
            ctf_params = data[im].get_attr("ctf")
            st = Util.infomask(data[im], mask2D, False)
            data[im] -= st[0]
            data[im] = filt_ctf(data[im], ctf_params, sign=-1, binary=1)
            data[im].set_attr('ctf_applied', 1)
        # for window mask:
        if boxmask is True:
            h_angle = data[im].get_attr("h_angle")
            original_data.append(data[im].copy())
            bmask = createBoxMask(nx, apix, ou, lmask, h_angle)
            data[im] *= bmask
            del bmask
    if debug:
        finfo.write('%d loaded  \n' % nima)
        finfo.flush()
    if myid == main_node:
        # initialize data for the reference preparation function
        ref_data = [mask3D, max(center, 0), None, None, None, None]
        # for method -1, switch off centering in user function

    from time import time

    #  this is needed for gathering of pixel errors
    disps = []
    recvcount = []
    disps_score = []
    recvcount_score = []
    for im in xrange(number_of_proc):
        if (im == main_node):
            disps.append(0)
            disps_score.append(0)
        else:
            disps.append(disps[im - 1] + recvcount[im - 1])
            disps_score.append(disps_score[im - 1] + recvcount_score[im - 1])
        ib, ie = MPI_start_end(total_nima, number_of_proc, im)
        recvcount.append(ie - ib)
        recvcount_score.append((ie - ib) * nrefs)

    pixer = [0.0] * nima
    cs = [0.0] * 3
    total_iter = 0
    volodd = EMData.read_images(ref_vol, xrange(nrefs))
    voleve = EMData.read_images(ref_vol, xrange(nrefs))

    if restart:
        # recreate initial volumes from alignments stored in header
        itout = "000_00"
        for iref in xrange(nrefs):
            if (nrefs == 1):
                modout = ""
            else:
                modout = "_model_%02d" % (iref)

            if (sort):
                group = iref
                for im in xrange(nima):
                    imgroup = data[im].get_attr('group')
                    if imgroup == iref:
                        data[im].set_attr('xform.projection',
                                          transmulti[im][iref])
            else:
                group = int(999)
                for im in xrange(nima):
                    data[im].set_attr('xform.projection', transmulti[im][iref])

            fscfile = os.path.join(outdir, "fsc_%s%s" % (itout, modout))

            vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF(
                data,
                sym,
                fscmask,
                fscfile,
                myid,
                main_node,
                index=group,
                npad=recon_pad)

            if myid == main_node:
                if helicalrecon:
                    from hfunctions import processHelicalVol
                    vstep = None
                    if vertstep is not None:
                        vstep = (vdp[iref], vdphi[iref])
                    print_msg(
                        "Old rise and twist for model %i     : %8.3f, %8.3f\n"
                        % (iref, dp[iref], dphi[iref]))
                    hvals = processHelicalVol(vol[iref], voleve[iref],
                                              volodd[iref], iref, outdir,
                                              itout, dp[iref], dphi[iref],
                                              apix, hsearch, findseam, vstep,
                                              wcmask)
                    (vol[iref], voleve[iref], volodd[iref], dp[iref],
                     dphi[iref], vdp[iref], vdphi[iref]) = hvals
                    print_msg(
                        "New rise and twist for model %i     : %8.3f, %8.3f\n"
                        % (iref, dp[iref], dphi[iref]))
                    # get new FSC from symmetrized half volumes
                    fscc = fsc_mask(volodd[iref], voleve[iref], mask3D, rstep,
                                    fscfile)
                else:
                    vol[iref].write_image(
                        os.path.join(outdir, "vol_%s.hdf" % itout), -1)

                if save_half is True:
                    volodd[iref].write_image(
                        os.path.join(outdir, "volodd_%s.hdf" % itout), -1)
                    voleve[iref].write_image(
                        os.path.join(outdir, "voleve_%s.hdf" % itout), -1)

                if nmasks > 1:
                    # Read mask for multiplying
                    ref_data[0] = maskF[iref]
                ref_data[2] = vol[iref]
                ref_data[3] = fscc
                #  call user-supplied function to prepare reference image, i.e., center and filter it
                vol[iref], cs, fl = ref_ali3d(ref_data)
                vol[iref].write_image(
                    os.path.join(outdir, "volf_%s.hdf" % (itout)), -1)
                if (apix == 1):
                    res_msg = "Models filtered at spatial frequency of:\t"
                    res = fl
                else:
                    res_msg = "Models filtered at resolution of:       \t"
                    res = apix / fl
                ares = array2string(array(res), precision=2)
                print_msg("%s%s\n\n" % (res_msg, ares))

            bcast_EMData_to_all(vol[iref], myid, main_node)
            # write out headers, under MPI writing has to be done sequentially
            mpi_barrier(MPI_COMM_WORLD)

    # projection matching
    for N_step in xrange(lstp):
        terminate = 0
        Iter = -1
        while (Iter < max_iter - 1 and terminate == 0):
            Iter += 1
            total_iter += 1
            itout = "%03g_%02d" % (delta[N_step], Iter)
            if myid == main_node:
                print_msg(
                    "ITERATION #%3d, inner iteration #%3d\nDelta = %4.1f, an = %5.2f, xrange = %5.2f, yrange = %5.2f, step = %5.2f\n\n"
                    % (N_step, Iter, delta[N_step], an[N_step], xrng[N_step],
                       yrng[N_step], step[N_step]))

            for iref in xrange(nrefs):
                if myid == main_node: start_time = time()
                volft, kb = prep_vol(vol[iref])

                ## constrain projections to out of plane parameter
                theta1 = None
                theta2 = None
                if oplane is not None:
                    theta1 = 90 - oplane
                    theta2 = 90 + oplane
                refrings = prepare_refrings(volft,
                                            kb,
                                            nx,
                                            delta[N_step],
                                            ref_a,
                                            sym,
                                            numr,
                                            MPI=True,
                                            phiEqpsi="Minus",
                                            initial_theta=theta1,
                                            delta_theta=theta2)

                del volft, kb

                if myid == main_node:
                    print_msg(
                        "Time to prepare projections for model %i: %s\n" %
                        (iref, legibleTime(time() - start_time)))
                    start_time = time()

                for im in xrange(nima):
                    data[im].set_attr("xform.projection", transmulti[im][iref])
                    if an[N_step] == -1:
                        t1, peak, pixer[im] = proj_ali_incore(
                            data[im], refrings, numr, xrng[N_step],
                            yrng[N_step], step[N_step], finfo)
                    else:
                        t1, peak, pixer[im] = proj_ali_incore_local(
                            data[im], refrings, numr, xrng[N_step],
                            yrng[N_step], step[N_step], an[N_step], finfo)
                    #data[im].set_attr("xform.projection"%iref, t1)
                    if nrefs > 1:
                        data[im].set_attr("eulers_txty.%i" % iref, t1)
                    scoremulti[im][iref] = peak
                    from pixel_error import max_3D_pixel_error
                    # t1 is the current param, t2 is old
                    t2 = transmulti[im][iref]
                    pixelmulti[im][iref] = max_3D_pixel_error(t1, t2, numr[-3])
                    transmulti[im][iref] = t1

                if myid == main_node:
                    print_msg("Time of alignment for model %i: %s\n" %
                              (iref, legibleTime(time() - start_time)))
                    start_time = time()

            # gather scoring data from all processors
            from mpi import mpi_gatherv
            scoremultisend = sum(scoremulti, [])
            pixelmultisend = sum(pixelmulti, [])
            tmp = mpi_gatherv(scoremultisend, len(scoremultisend), MPI_FLOAT,
                              recvcount_score, disps_score, MPI_FLOAT,
                              main_node, MPI_COMM_WORLD)
            tmp1 = mpi_gatherv(pixelmultisend, len(pixelmultisend), MPI_FLOAT,
                               recvcount_score, disps_score, MPI_FLOAT,
                               main_node, MPI_COMM_WORLD)
            tmp = mpi_bcast(tmp, (total_nima * nrefs), MPI_FLOAT, 0,
                            MPI_COMM_WORLD)
            tmp1 = mpi_bcast(tmp1, (total_nima * nrefs), MPI_FLOAT, 0,
                             MPI_COMM_WORLD)
            tmp = map(float, tmp)
            tmp1 = map(float, tmp1)
            score = array(tmp).reshape(-1, nrefs)
            pixelerror = array(tmp1).reshape(-1, nrefs)
            score_local = array(scoremulti)
            mean_score = score.mean(axis=0)
            std_score = score.std(axis=0)
            cut = mean_score - (cutoff * std_score)
            cut2 = mean_score + (cutoff * std_score)
            res_max = score_local.argmax(axis=1)
            minus_cc = [0.0 for x in xrange(nrefs)]
            minus_pix = [0.0 for x in xrange(nrefs)]
            minus_ref = [0.0 for x in xrange(nrefs)]

            #output pixel errors
            if (myid == main_node):
                from statistics import hist_list
                lhist = 20
                pixmin = pixelerror.min(axis=1)
                region, histo = hist_list(pixmin, lhist)
                if (region[0] < 0.0): region[0] = 0.0
                print_msg(
                    "Histogram of pixel errors\n      ERROR       number of particles\n"
                )
                for lhx in xrange(lhist):
                    print_msg(" %10.3f     %7d\n" % (region[lhx], histo[lhx]))
                # Terminate if 95% within 1 pixel error
                im = 0
                for lhx in xrange(lhist):
                    if (region[lhx] > 1.0): break
                    im += histo[lhx]
                print_msg("Percent of particles with pixel error < 1: %f\n\n" %
                          (im / float(total_nima) * 100))
                term_cond = float(term) / 100
                if (im / float(total_nima) > term_cond):
                    terminate = 1
                    print_msg("Terminating internal loop\n")
                del region, histo
            terminate = mpi_bcast(terminate, 1, MPI_INT, 0, MPI_COMM_WORLD)
            terminate = int(terminate[0])

            for im in xrange(nima):
                if (sort == False):
                    data[im].set_attr('group', 999)
                elif (mjump[N_step] == 1):
                    data[im].set_attr('group', int(res_max[im]))

                pix_run = data[im].get_attr('pix_score')
                if (pix_cutoff[N_step] == 1
                        and (terminate == 1 or Iter == max_iter - 1)):
                    if (pixelmulti[im][int(res_max[im])] > 1):
                        data[im].set_attr('pix_score', int(777))

                if (score_local[im][int(res_max[im])] < cut[int(
                        res_max[im])]) or (two_tail and score_local[im][int(
                            res_max[im])] > cut2[int(res_max[im])]):
                    data[im].set_attr('group', int(888))
                    minus_cc[int(res_max[im])] = minus_cc[int(res_max[im])] + 1

                if (pix_run == 777):
                    data[im].set_attr('group', int(777))
                    minus_pix[int(
                        res_max[im])] = minus_pix[int(res_max[im])] + 1

                if (compare_ref_free != "-1") and (ref_free_cutoff[N_step] !=
                                                   -1) and (total_iter > 1):
                    id = data[im].get_attr('ID')
                    if id in rejects:
                        data[im].set_attr('group', int(666))
                        minus_ref[int(
                            res_max[im])] = minus_ref[int(res_max[im])] + 1

            minus_cc_tot = mpi_reduce(minus_cc, nrefs, MPI_FLOAT, MPI_SUM, 0,
                                      MPI_COMM_WORLD)
            minus_pix_tot = mpi_reduce(minus_pix, nrefs, MPI_FLOAT, MPI_SUM, 0,
                                       MPI_COMM_WORLD)
            minus_ref_tot = mpi_reduce(minus_ref, nrefs, MPI_FLOAT, MPI_SUM, 0,
                                       MPI_COMM_WORLD)
            if (myid == main_node):
                if (sort):
                    tot_max = score.argmax(axis=1)
                    res = bincount(tot_max)
                else:
                    res = ones(nrefs) * total_nima
                print_msg("Particle distribution:	     \t\t%s\n" % (res * 1.0))
                afcut1 = res - minus_cc_tot
                afcut2 = afcut1 - minus_pix_tot
                afcut3 = afcut2 - minus_ref_tot
                print_msg("Particle distribution after cc cutoff:\t\t%s\n" %
                          (afcut1))
                print_msg("Particle distribution after pix cutoff:\t\t%s\n" %
                          (afcut2))
                print_msg("Particle distribution after ref cutoff:\t\t%s\n\n" %
                          (afcut3))

            res = [0.0 for i in xrange(nrefs)]
            for iref in xrange(nrefs):
                if (center == -1):
                    from utilities import estimate_3D_center_MPI, rotate_3D_shift
                    dummy = EMData()
                    cs[0], cs[1], cs[2], dummy, dummy = estimate_3D_center_MPI(
                        data, total_nima, myid, number_of_proc, main_node)
                    cs = mpi_bcast(cs, 3, MPI_FLOAT, main_node, MPI_COMM_WORLD)
                    cs = [-float(cs[0]), -float(cs[1]), -float(cs[2])]
                    rotate_3D_shift(data, cs)

                if (sort):
                    group = iref
                    for im in xrange(nima):
                        imgroup = data[im].get_attr('group')
                        if imgroup == iref:
                            data[im].set_attr('xform.projection',
                                              transmulti[im][iref])
                else:
                    group = int(999)
                    for im in xrange(nima):
                        data[im].set_attr('xform.projection',
                                          transmulti[im][iref])
                if (nrefs == 1):
                    modout = ""
                else:
                    modout = "_model_%02d" % (iref)

                fscfile = os.path.join(outdir, "fsc_%s%s" % (itout, modout))
                vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF(
                    data,
                    sym,
                    fscmask,
                    fscfile,
                    myid,
                    main_node,
                    index=group,
                    npad=recon_pad)

                if myid == main_node:
                    print_msg("3D reconstruction time for model %i: %s\n" %
                              (iref, legibleTime(time() - start_time)))
                    start_time = time()

                # Compute Fourier variance
                if fourvar:
                    outvar = os.path.join(outdir, "volVar_%s.hdf" % (itout))
                    ssnr_file = os.path.join(outdir, "ssnr_%s" % (itout))
                    varf = varf3d_MPI(data,
                                      ssnr_text_file=ssnr_file,
                                      mask2D=None,
                                      reference_structure=vol[iref],
                                      ou=last_ring,
                                      rw=1.0,
                                      npad=1,
                                      CTF=None,
                                      sign=1,
                                      sym=sym,
                                      myid=myid)
                    if myid == main_node:
                        print_msg(
                            "Time to calculate 3D Fourier variance for model %i: %s\n"
                            % (iref, legibleTime(time() - start_time)))
                        start_time = time()
                        varf = 1.0 / varf
                        varf.write_image(outvar, -1)
                else:
                    varf = None

                if myid == main_node:
                    if helicalrecon:
                        from hfunctions import processHelicalVol

                        vstep = None
                        if vertstep is not None:
                            vstep = (vdp[iref], vdphi[iref])
                        print_msg(
                            "Old rise and twist for model %i     : %8.3f, %8.3f\n"
                            % (iref, dp[iref], dphi[iref]))
                        hvals = processHelicalVol(vol[iref], voleve[iref],
                                                  volodd[iref], iref, outdir,
                                                  itout, dp[iref], dphi[iref],
                                                  apix, hsearch, findseam,
                                                  vstep, wcmask)
                        (vol[iref], voleve[iref], volodd[iref], dp[iref],
                         dphi[iref], vdp[iref], vdphi[iref]) = hvals
                        print_msg(
                            "New rise and twist for model %i     : %8.3f, %8.3f\n"
                            % (iref, dp[iref], dphi[iref]))
                        # get new FSC from symmetrized half volumes
                        fscc = fsc_mask(volodd[iref], voleve[iref], mask3D,
                                        rstep, fscfile)

                        print_msg(
                            "Time to search and apply helical symmetry for model %i: %s\n\n"
                            % (iref, legibleTime(time() - start_time)))
                        start_time = time()
                    else:
                        vol[iref].write_image(
                            os.path.join(outdir, "vol_%s.hdf" % (itout)), -1)

                    if save_half is True:
                        volodd[iref].write_image(
                            os.path.join(outdir, "volodd_%s.hdf" % (itout)),
                            -1)
                        voleve[iref].write_image(
                            os.path.join(outdir, "voleve_%s.hdf" % (itout)),
                            -1)

                    if nmasks > 1:
                        # Read mask for multiplying
                        ref_data[0] = maskF[iref]
                    ref_data[2] = vol[iref]
                    ref_data[3] = fscc
                    ref_data[4] = varf
                    #  call user-supplied function to prepare reference image, i.e., center and filter it
                    vol[iref], cs, fl = ref_ali3d(ref_data)
                    vol[iref].write_image(
                        os.path.join(outdir, "volf_%s.hdf" % (itout)), -1)
                    if (apix == 1):
                        res_msg = "Models filtered at spatial frequency of:\t"
                        res[iref] = fl
                    else:
                        res_msg = "Models filtered at resolution of:       \t"
                        res[iref] = apix / fl

                del varf
                bcast_EMData_to_all(vol[iref], myid, main_node)

                if compare_ref_free != "-1": compare_repro = True
                if compare_repro:
                    outfile_repro = comp_rep(refrings, data, itout, modout,
                                             vol[iref], group, nima, nx, myid,
                                             main_node, outdir)
                    mpi_barrier(MPI_COMM_WORLD)
                    if compare_ref_free != "-1":
                        ref_free_output = os.path.join(
                            outdir, "ref_free_%s%s" % (itout, modout))
                        rejects = compare(compare_ref_free, outfile_repro,
                                          ref_free_output, yrng[N_step],
                                          xrng[N_step], rstep, nx, apix,
                                          ref_free_cutoff[N_step],
                                          number_of_proc, myid, main_node)

            # retrieve alignment params from all processors
            par_str = ['xform.projection', 'ID', 'group']
            if nrefs > 1:
                for iref in xrange(nrefs):
                    par_str.append('eulers_txty.%i' % iref)

            if myid == main_node:
                from utilities import recv_attr_dict
                recv_attr_dict(main_node, stack, data, par_str, image_start,
                               image_end, number_of_proc)

            else:
                send_attr_dict(main_node, data, par_str, image_start,
                               image_end)

            if myid == main_node:
                ares = array2string(array(res), precision=2)
                print_msg("%s%s\n\n" % (res_msg, ares))
                dummy = EMData()
                if full_output:
                    nimat = EMUtil.get_image_count(stack)
                    output_file = os.path.join(outdir, "paramout_%s" % itout)
                    foutput = open(output_file, 'w')
                    for im in xrange(nimat):
                        # save the parameters for each of the models
                        outstring = ""
                        dummy.read_image(stack, im, True)
                        param3d = dummy.get_attr('xform.projection')
                        g = dummy.get_attr("group")
                        # retrieve alignments in EMAN-format
                        pE = param3d.get_params('eman')
                        outstring += "%f\t%f\t%f\t%f\t%f\t%i\n" % (
                            pE["az"], pE["alt"], pE["phi"], pE["tx"], pE["ty"],
                            g)
                        foutput.write(outstring)
                    foutput.close()
                del dummy
            mpi_barrier(MPI_COMM_WORLD)


#	mpi_finalize()

    if myid == main_node: print_end_msg("ali3d_MPI")
Example #4
0
def main(args):
    # 	parser1 = argparse.ArgumentParser(description='This program is used to window particles from a micrograph. The coordinates of the particles are given as input.')
    # 	parser1.add_argument()

    progname = os.path.basename(sys.argv[0])
    usage = progname + " micrographs_list  --coords_dir=coords_dir  --coords_suffix=coords_suffix" + \
                                              "  --coords_extension=coords_extension  --coords_format=coords_format" + \
                                              "  --indir=input_dir  --importctf=ctf_file  --limitctf" + \
                                              "  --resample_ratio=resample_ratio  --box_size=box_size" + \
                                              "  --outdir=outdir  --outsuffix=outsuffix  --micsuffix=micsuffix" + \
                                              "  --nameroot=nameroot  --invert" + \
                                              "  --defocuserror=defocuserror  --astigmatismerror=astigmatismerror"
    parser = OptionParser(usage, version=SPARXVERSION)

    parser.add_option(
        "--coords_dir",
        type="string",
        default=".",
        help=
        "<Coordinates Directory> Directory containing files with particle coordinates. (Default: current directory)"
    )
    parser.add_option(
        "--coords_suffix",
        type="string",
        default="",
        help=
        "<Coordinates File Suffix> Suffix of coordinate files. For example '_ptcls'. "
    )
    parser.add_option(
        "--coords_extension",
        type="string",
        default="box",
        help=
        "<Coordinates File Extension> File extension of coordinate files. e.g 'box' for eman1, 'json' for eman2, ..."
    )
    parser.add_option(
        "--coords_format",
        type="string",
        default="eman1",
        help=
        "<Coordinates File Format> Format of coordinates file: 'sparx', 'eman1', 'eman2', or 'spider'. The coordinates of sparx, eman2, and spider format is particle center. The coordinates of eman1 format is particle box conner associated with the original box size."
    )
    parser.add_option(
        "--indir",
        type="string",
        default=".",
        help=
        "<Micrograph Directory> Directory containing micrographs to be processed. (Default: current directory)"
    )
    parser.add_option(
        "--nameroot",
        type="string",
        default="",
        help=
        "<Micrograph Root Name> Root name (Prefix) of micrographs to be processed."
    )
    parser.add_option(
        "--micsuffix",
        type="string",
        default="hdf",
        help=
        "<Micrograph Extension > A string denoting micrograph type. (Default 'hdf')"
    )
    parser.add_option(
        "--outdir",
        type="string",
        default=".",
        help="<Output Directory> Output directory (Default: current directory)"
    )
    parser.add_option(
        "--outsuffix",
        type="string",
        default="_ptcls",
        help="<Output File Suffix> Suffix for output stack. (Default '_ptcls')"
    )
    parser.add_option(
        "--importctf",
        type="string",
        default="",
        help="<CTER CTF File> File name with CTF parameters produced by sxcter."
    )
    parser.add_option(
        "--box_size",
        type="int",
        default=256,
        help=
        "<Box Size> x and y dimension in pixels of square area to be windowed. Pixel size after resampling is assumed when resample_ratio < 1.0 (Default 256)"
    )
    parser.add_option(
        "--invert",
        action="store_true",
        default=False,
        help=
        "<Invert Contrast> Invert image contrast (recommended for cryo data) (Default, no contrast inversion)"
    )
    parser.add_option(
        "--resample_ratio",
        type="float",
        default=1.0,
        help=
        "<Resample Ratio> Ratio of new to old image size (or old to new pixel size) for resampling. Valid range is 0.0 < resample_ratio <= 1.0. (Default: 1.0)  (advanced)"
    )
    parser.add_option(
        "--limitctf",
        action="store_true",
        default=False,
        help=
        "<Apply CTF-Limit Filter> Filter micrographs based on the CTF limit. It requires --importctf. (Default: no filter) (advanced)"
    )
    parser.add_option(
        "--defocuserror",
        type="float",
        default=1000000.0,
        help=
        "<Defocus Error Limit> Exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocuserror percent.  The error is computed as (std dev defocus)/defocus*100%. (Default: include all irrespective of error values.) (advanced)"
    )
    parser.add_option(
        "--astigmatismerror",
        type="float",
        default=360.0,
        help=
        "<Astigmatism Error Limit> Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatismerror degrees. (Default: include all irrespective of error values.)  (advanced)"
    )

    # must be switched off in production
    # parser.add_option("--use_latest_master_directory", action="store_true", dest="use_latest_master_directory", default=False)
    #
    # parser.add_option("--restart_section", type="string", default="", help="<restart section name> (no spaces) followed immediately by comma, followed immediately by generation to restart, example: \n--restart_section=candidate_class_averages,1         (Sections: restart, candidate_class_averages, reproducible_class_averages)")
    # parser.add_option("--stop_after_candidates",          action="store_true", default=False,   help="<stop_after_candidates> stops after the 'candidate_class_averages' section")
    #
    parser.add_option("--return_options",
                      action="store_true",
                      dest="return_options",
                      default=False,
                      help=SUPPRESS_HELP)

    (options, args) = parser.parse_args(args)

    if options.return_options:
        return parser

# 	Set local constants
    box_size = options.box_size
    box_half = box_size // 2
    options.micsuffix = "." + options.micsuffix
    cterr = [options.defocuserror / 100.0, options.astigmatismerror]

    check_options(options, progname)

    extension_coord = options.coords_suffix + "." + options.coords_extension

    # 	Build micrograph basename list
    micnames = build_micnames(options, args)
    print_msg('Detected micrographs : %6d ...\n' % (len(micnames)))

    # 	If there is no micrographs, exit
    if len(micnames) == 0:
        print usage
        sys.exit()

# 	Load CTFs
    n_reject_defocus_error = 0
    if options.importctf != None:
        ctfs0 = read_text_row(options.importctf)
        print_msg('Detected CTF entries : %6d ...\n' % (len(ctfs0)))

        ctfs = {}
        for i in xrange(len(ctfs0)):
            ctf = ctfs0[i]
            basemic = baseroot(ctf[-1])
            if (ctf[8] / ctf[0] > cterr[0]):
                print_msg(
                    'Defocus error %f exceeds the threshold. Micrograph %s rejected.\n'
                    % (ctf[8] / ctf[0], basemic))
                n_reject_defocus_error += 1
            else:
                if (ctf[10] > cterr[1]):
                    ctf[6] = 0.0
                    ctf[7] = 0.0
                ctfs[basemic] = ctf
        print_msg('Rejected micrographs by defocus error  : %6d ...\n' %
                  (n_reject_defocus_error))

# 	Create circular 2D mask for ...
    mask = model_circle(box_size // 2, box_size, box_size)

    # 	Prepare loop variables
    n_micrographs_process = 0
    n_micrographs_reject_no_micrograph = 0
    n_micrographs_reject_no_coordinates = 0
    n_micrographs_reject_no_cter_entry = 0
    n_total_coordinates_detect = 0
    n_total_coordinates_process = 0
    n_total_coordinates_reject_out_of_boundary = 0
    cutoffhistogram = [
    ]  #@ming compute the histogram for micrographs cut of by ctf limit.
    # 	Loop over micrographs
    for k in range(len(micnames)):
        # basename is name of micrograph minus the path and extension
        # Here, assuming micrograph and coordinates have the same file basename
        basename = micnames[k]
        f_mic = os.path.join(os.path.abspath(options.indir),
                             basename + options.micsuffix)
        f_info = os.path.join(options.coords_dir, basename + extension_coord)

        # 		CHECKS: BEGIN
        # 		IF micrograph exists
        if not os.path.exists(f_mic):
            print_msg('    Cannot read %s. Skipping %s ...\n' %
                      (f_mic, basename))
            n_micrographs_reject_no_micrograph += 1
            continue

# 		IF coordinates file exists
        if not os.path.exists(f_info):
            print_msg('    Cannot read %s. Skipping %s ...\n' %
                      (f_info, basename))
            n_micrographs_reject_no_coordinates += 1
            continue

# 		IF micrograph is in CTER results
        if options.importctf != None:
            if basename not in ctfs:
                print_msg(
                    '    Is not listed in CTER results, skipping %s...\n' %
                    (basename))
                n_micrographs_reject_no_cter_entry += 1
                continue
            else:
                ctf = ctfs[basename]
# 		CHECKS: END

        n_micrographs_process += 1

        print_msg('\n')
        print_msg(
            'Processing micrograph %s... Path: %s... Coordinates file %s\n' %
            (basename, f_mic, f_info))

        # 		Read coordinates according to the specified format and
        # 		make the coordinates the center of particle image
        if options.coords_format.lower() == 'sparx':
            coords = read_text_row(f_info)
        elif options.coords_format.lower() == 'eman1':
            coords = read_text_row(f_info)
            for i in range(len(coords)):
                coords[i] = [
                    coords[i][0] + coords[i][2] // 2,
                    coords[i][1] + coords[i][3] // 2
                ]
        elif options.coords_format.lower() == 'eman2':
            coords = js_open_dict(f_info)["boxes"]
            for i in range(len(coords)):
                coords[i] = [coords[i][0], coords[i][1]]
        elif options.coords_format.lower() == 'spider':
            coords = read_text_row(f_info)
            for i in range(len(coords)):
                coords[i] = [coords[i][2], coords[i][3]]
        else:
            assert (False)  # Unreachable code

# 		Load micrograph from the file
        immic = get_im(f_mic)

        # 		Calculate the new pixel size
        resample_ratio = options.resample_ratio
        if options.importctf != None:
            pixel_size_orig = ctf[3]

            if resample_ratio < 1.0:
                assert (resample_ratio > 0.0)
                new_pixel_size = pixel_size_orig / resample_ratio
                print_msg(
                    'Resample micrograph to pixel size %6.4f and window segments from resampled micrograph\n'
                    % new_pixel_size)
            else:
                # assert(resample_ratio == 1.0)
                new_pixel_size = pixel_size_orig

# 			Set ctf along with new pixel size in resampled micrograph
            ctf[3] = new_pixel_size
        else:
            assert (options.importctf == None)
            if resample_ratio < 1.0:
                assert (resample_ratio > 0.0)
                print_msg(
                    'Resample micrograph with ratio %6.4f and window segments from resampled micrograph\n'
                    % resample_ratio)
            # else:
            #	assert(resample_ratio == 1.0)

# 		Apply filters to micrograph
        fftip(immic)
        if options.limitctf:
            assert (options.importctf != None)
            # 			Cut off frequency components higher than CTF limit
            q1, q2 = ctflimit(box_size, ctf[0], ctf[1], ctf[2], new_pixel_size)

            # 			This is absolute frequency of the CTF limit in the scale of original micrograph
            if resample_ratio < 1.0:
                assert (resample_ratio > 0.0)
                q1 = resample_ratio * q1 / float(
                    box_size
                )  # q1 = (pixel_size_orig / new_pixel_size) * q1/float(box_size)
            else:
                # assert(resample_ratio == 1.0) -> pixel_size_orig == new_pixel_size -> pixel_size_orig / new_pixel_size == 1.0
                q1 = q1 / float(box_size)

            if q1 < 0.5:  #@ming
                immic = filt_tanl(immic, q1, 0.01)
                cutoffhistogram.append(q1)

# 		Cut off frequency components lower than the box size can express
        immic = fft(filt_gaussh(immic, resample_ratio / box_size))

        # 		Resample micrograph, map coordinates, and window segments from resampled micrograph using new coordinates
        # 		after resampling by resample_ratio, new pixel size will be pixel_size/resample_ratio = new_pixel_size
        #		NOTE: 2015/04/13 Toshio Moriya
        #		resample() efficiently takes care of the case resample_ratio = 1.0 but
        #		it does not set apix_*. Even though it sets apix_* when resample_ratio < 1.0 ...
        immic = resample(immic, resample_ratio)

        if options.invert:
            stt = Util.infomask(immic, None, True)
            Util.mul_scalar(immic, -1.0)
            immic += 2 * stt[0]

        if options.importctf != None:
            from utilities import generate_ctf
            ctf = generate_ctf(ctf)

# 		Prepare loop variables
        nx = immic.get_xsize()
        ny = immic.get_ysize()
        x0 = nx // 2
        y0 = ny // 2
        print_msg('\n')
        print_msg('Micrograph size := (%6d, %6d)\n' % (nx, ny))

        otcl_images = "bdb:%s/" % options.outdir + basename + options.outsuffix
        ind = 0

        n_coordinates_reject_out_of_boundary = 0

        # 		Loop over coordinates
        for i in range(len(coords)):

            source_x = int(coords[i][0])
            source_y = int(coords[i][1])

            x = source_x
            y = source_y

            if resample_ratio < 1.0:
                assert (resample_ratio > 0.0)
                x = int(x * resample_ratio)
                y = int(y * resample_ratio)
            # else:
            # 	assert(resample_ratio == 1.0)

            if ((0 <= x - box_half) and (x + box_half <= nx)
                    and (0 <= y - box_half) and (y + box_half <= ny)):
                imw = Util.window(immic, box_size, box_size, 1, x - x0, y - y0)
            else:
                print_msg(
                    'Coordinates ID = %04d (x = %4d, y = %4d, box_size = %4d) is out of micrograph bound, skipping ....\n'
                    % (i, x, y, box_size))
                n_coordinates_reject_out_of_boundary += 1
                continue

            imw = ramp(imw)
            stat = Util.infomask(imw, mask, False)
            imw -= stat[0]
            imw /= stat[1]

            #			NOTE: 2015/04/09 Toshio Moriya
            #		    ptcl_source_image might be redundant information ...
            #		    Consider re-organizing header entries...
            imw.set_attr("ptcl_source_image", f_mic)
            imw.set_attr("ptcl_source_coord_id", i)
            imw.set_attr("ptcl_source_coord", [source_x, source_y])
            imw.set_attr("resample_ratio", resample_ratio)

            #			NOTE: 2015/04/13 Toshio Moriya
            #			apix_* attributes are updated by resample() only when resample_ratio != 1.0
            # 			Let's make sure header info is consistent by setting apix_* = 1.0
            #			regardless of options, so it is not passed down the processing line
            imw.set_attr("apix_x", 1.0)
            imw.set_attr("apix_y", 1.0)
            imw.set_attr("apix_z", 1.0)
            if options.importctf != None:
                imw.set_attr("ctf", ctf)
                imw.set_attr("ctf_applied", 0)
                imw.set_attr("pixel_size_orig", pixel_size_orig)
                # imw.set_attr("apix_x", new_pixel_size)
                # imw.set_attr("apix_y", new_pixel_size)
                # imw.set_attr("apix_z", new_pixel_size)
#			NOTE: 2015/04/13 Toshio Moriya
#			Pawel Comment: Micrograph is not supposed to have CTF header info.
#			So, let's assume it does not exist & ignore its presence.
#           Note that resample() "correctly" updates pixel size of CTF header info if it exists
# elif (imw.has_ctff()):
# 	assert(options.importctf == None)
# 	ctf_origin = imw.get_attr("ctf")
# 	pixel_size_origin = round(ctf_origin.apix, 5) # Because SXCTER ouputs up to 5 digits
# 	imw.set_attr("apix_x",pixel_size_origin)
# 	imw.set_attr("apix_y",pixel_size_origin)
# 	imw.set_attr("apix_z",pixel_size_origin)

            imw.write_image(otcl_images, ind)
            ind += 1

        n_total_coordinates_detect += len(coords)
        n_total_coordinates_process += ind
        n_total_coordinates_reject_out_of_boundary += n_coordinates_reject_out_of_boundary

        #		Print out the summary of this micrograph
        print_msg('\n')
        print_msg('Micrograph summary of coordinates...\n')
        print_msg('Detected                        : %4d\n' % (len(coords)))
        print_msg('Processed                       : %4d\n' % (ind))
        print_msg('Rejected by out of boundary     : %4d\n' %
                  (n_coordinates_reject_out_of_boundary))

    if options.limitctf:
        #		Print out the summary of CTF-limit filtering
        print_msg('\n')
        print_msg('Global summary of CTF-limit filtering (--limitctf) ...\n')
        print_msg('Percentage of filtered micrographs: %8.2f\n' %
                  (len(cutoffhistogram) * 100.0 / len(micnames)))

        lhist = 10
        if len(cutoffhistogram) >= lhist:
            from statistics import hist_list
            region, hist = hist_list(cutoffhistogram, lhist)
            print_msg("      Histogram of cut off frequency\n")
            print_msg("      ERROR       number of frequencies\n")
            for lhx in xrange(lhist):
                print_msg(
                    " %14.7f     %7d\n" % (region[lhx], hist[lhx])
                )  # print_msg(" %10.3f     %7d\n" % (region[lhx], hist[lhx]))
        else:
            print_msg(
                "The number of filtered micrographs (%d) is less than the number of bins (%d). No histogram is produced.\n"
                % (len(cutoffhistogram), lhist))


#	Print out the summary of all micrographs
    print_msg('\n')
    print_msg('Global summary of micrographs ...\n')
    print_msg('Detected                        : %6d\n' % (len(micnames)))
    print_msg('Processed                       : %6d\n' %
              (n_micrographs_process))
    print_msg('Rejected by no micrograph file  : %6d\n' %
              (n_micrographs_reject_no_micrograph))
    print_msg('Rejected by no coordinates file : %6d\n' %
              (n_micrographs_reject_no_coordinates))
    print_msg('Rejected by no CTER entry       : %6d\n' %
              (n_micrographs_reject_no_cter_entry))
    print_msg('\n')
    print_msg('Global summary of coordinates ...\n')
    print_msg('Detected                        : %6d\n' %
              (n_total_coordinates_detect))
    print_msg('Processed                       : %6d\n' %
              (n_total_coordinates_process))
    print_msg('Rejected by out of boundary     : %6d\n' %
              (n_total_coordinates_reject_out_of_boundary))
    print_msg('\n')
Example #5
0
def main(args):
# 	parser1 = argparse.ArgumentParser(description='This program is used to window particles from a micrograph. The coordinates of the particles are given as input.')
# 	parser1.add_argument()

	progname = os.path.basename(sys.argv[0])
	usage = progname + " micrographs_list  --coords_dir=coords_dir  --coords_suffix=coords_suffix" + \
	                                          "  --coords_extension=coords_extension  --coords_format=coords_format" + \
	                                          "  --indir=input_dir  --importctf=ctf_file  --limitctf" + \
	                                          "  --resample_ratio=resample_ratio  --box_size=box_size" + \
	                                          "  --outdir=outdir  --outsuffix=outsuffix  --micsuffix=micsuffix" + \
	                                          "  --nameroot=nameroot  --invert" + \
	                                          "  --defocuserror=defocuserror  --astigmatismerror=astigmatismerror"
	parser = OptionParser(usage, version=SPARXVERSION)

	parser.add_option("--coords_dir",       type="string",        default=".",       help="<Coordinates Directory> Directory containing files with particle coordinates. (Default: current directory)")
	parser.add_option("--coords_suffix",    type="string",        default="",        help="<Coordinates File Suffix> Suffix of coordinate files. For example '_ptcls'. ")
	parser.add_option("--coords_extension", type="string",        default="box",     help="<Coordinates File Extension> File extension of coordinate files. e.g 'box' for eman1, 'json' for eman2, ...")
	parser.add_option("--coords_format",    type="string",        default="eman1",   help="<Coordinates File Format> Format of coordinates file: 'sparx', 'eman1', 'eman2', or 'spider'. The coordinates of sparx, eman2, and spider format is particle center. The coordinates of eman1 format is particle box conner associated with the original box size.")
	parser.add_option("--indir",            type="string",        default=".",       help="<Micrograph Directory> Directory containing micrographs to be processed. (Default: current directory)")
	parser.add_option("--nameroot",         type="string",        default="",        help="<Micrograph Root Name> Root name (Prefix) of micrographs to be processed.")
	parser.add_option("--micsuffix",        type="string",        default="hdf",     help="<Micrograph Extension > A string denoting micrograph type. (Default 'hdf')")
	parser.add_option("--outdir",           type="string",        default=".",       help="<Output Directory> Output directory (Default: current directory)")
	parser.add_option("--outsuffix",        type="string",        default="_ptcls",  help="<Output File Suffix> Suffix for output stack. (Default '_ptcls')")
	parser.add_option("--importctf",        type="string",        default="",        help="<CTER CTF File> File name with CTF parameters produced by sxcter.") 
	parser.add_option("--box_size",         type="int",           default=256,       help="<Box Size> x and y dimension in pixels of square area to be windowed. Pixel size after resampling is assumed when resample_ratio < 1.0 (Default 256)")
	parser.add_option("--invert",           action="store_true",  default=False,     help="<Invert Contrast> Invert image contrast (recommended for cryo data) (Default, no contrast inversion)")
	parser.add_option("--resample_ratio",   type="float",         default=1.0,       help="<Resample Ratio> Ratio of new to old image size (or old to new pixel size) for resampling. Valid range is 0.0 < resample_ratio <= 1.0. (Default: 1.0)  (advanced)")
	parser.add_option("--limitctf",         action="store_true",  default=False,     help="<Apply CTF-Limit Filter> Filter micrographs based on the CTF limit. It requires --importctf. (Default: no filter) (advanced)")	
	parser.add_option("--defocuserror",     type="float",         default=1000000.0, help="<Defocus Error Limit> Exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocuserror percent.  The error is computed as (std dev defocus)/defocus*100%. (Default: include all irrespective of error values.) (advanced)" )
	parser.add_option("--astigmatismerror", type="float",         default=360.0,     help="<Astigmatism Error Limit> Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatismerror degrees. (Default: include all irrespective of error values.)  (advanced)")
	
	# must be switched off in production
	# parser.add_option("--use_latest_master_directory", action="store_true", dest="use_latest_master_directory", default=False)
	# 
	# parser.add_option("--restart_section", type="string", default="", help="<restart section name> (no spaces) followed immediately by comma, followed immediately by generation to restart, example: \n--restart_section=candidate_class_averages,1         (Sections: restart, candidate_class_averages, reproducible_class_averages)")
	# parser.add_option("--stop_after_candidates",          action="store_true", default=False,   help="<stop_after_candidates> stops after the 'candidate_class_averages' section")
	# 
	parser.add_option("--return_options", action="store_true", dest="return_options", default=False, help = SUPPRESS_HELP)

	(options, args) = parser.parse_args(args)

	if options.return_options:
		return parser
	
# 	Set local constants
	box_size = options.box_size
	box_half = box_size // 2
	options.micsuffix = "." + options.micsuffix
	cterr = [options.defocuserror/100.0, options.astigmatismerror]
		
	check_options(options, progname)
	
	extension_coord = options.coords_suffix + "." + options.coords_extension
	
# 	Build micrograph basename list
	micnames = build_micnames(options, args)
	print_msg('Detected micrographs : %6d ...\n' % (len(micnames)))
	
# 	If there is no micrographs, exit
	if len(micnames) == 0:
		print usage
		sys.exit()
	
# 	Load CTFs
	n_reject_defocus_error = 0
	if options.importctf != None:
		ctfs0 = read_text_row(options.importctf)
		print_msg('Detected CTF entries : %6d ...\n' % (len(ctfs0)))

		ctfs={}
		for i in xrange(len(ctfs0)):
			ctf=ctfs0[i]
			basemic = baseroot(ctf[-1])
			if(ctf[8]/ctf[0] > cterr[0]):
				print_msg('Defocus error %f exceeds the threshold. Micrograph %s rejected.\n' % (ctf[8]/ctf[0], basemic))
				n_reject_defocus_error += 1
			else:
				if(ctf[10] > cterr[1] ):
					ctf[6] = 0.0
					ctf[7] = 0.0
				ctfs[basemic] = ctf
		print_msg('Rejected micrographs by defocus error  : %6d ...\n' % (n_reject_defocus_error))
	
# 	Create circular 2D mask for ...
	mask = model_circle(box_size//2, box_size, box_size)

# 	Prepare loop variables
	n_micrographs_process = 0
	n_micrographs_reject_no_micrograph = 0
	n_micrographs_reject_no_coordinates = 0
	n_micrographs_reject_no_cter_entry = 0
	n_total_coordinates_detect = 0
	n_total_coordinates_process = 0
	n_total_coordinates_reject_out_of_boundary = 0
	cutoffhistogram = []		#@ming compute the histogram for micrographs cut of by ctf limit.
# 	Loop over micrographs
	for k in range(len(micnames)):
		# basename is name of micrograph minus the path and extension
		# Here, assuming micrograph and coordinates have the same file basename
		basename = micnames[k]
		f_mic    = os.path.join(os.path.abspath(options.indir), basename + options.micsuffix)
		f_info   = os.path.join(options.coords_dir, basename + extension_coord)

# 		CHECKS: BEGIN
# 		IF micrograph exists
		if not os.path.exists(f_mic):
			print_msg('    Cannot read %s. Skipping %s ...\n' % (f_mic, basename))
			n_micrographs_reject_no_micrograph += 1
			continue
		
# 		IF coordinates file exists
		if not os.path.exists(f_info):
			print_msg('    Cannot read %s. Skipping %s ...\n' % (f_info, basename))
			n_micrographs_reject_no_coordinates += 1
			continue
		
# 		IF micrograph is in CTER results
		if options.importctf != None:
			if basename not in ctfs:
				print_msg('    Is not listed in CTER results, skipping %s...\n' % (basename))
				n_micrographs_reject_no_cter_entry += 1
				continue
			else:
				ctf = ctfs[basename]		
# 		CHECKS: END

		n_micrographs_process += 1
		
		print_msg('\n')
		print_msg('Processing micrograph %s... Path: %s... Coordinates file %s\n' % (basename, f_mic, f_info))
	
# 		Read coordinates according to the specified format and 
# 		make the coordinates the center of particle image 
		if options.coords_format.lower() == 'sparx' :
			coords = read_text_row(f_info)
		elif options.coords_format.lower() == 'eman1':
			coords = read_text_row(f_info)
			for i in range(len(coords)):
				coords[i] = [coords[i][0] + coords[i][2]//2  ,coords[i][1] + coords[i][3]//2]
		elif options.coords_format.lower() == 'eman2':
			coords = js_open_dict(f_info)["boxes"]
			for i in range(len(coords)):
				coords[i] = [coords[i][0],coords[i][1]]
		elif options.coords_format.lower() == 'spider':
			coords = read_text_row(f_info)
			for i in range(len(coords)):
				coords[i] = [coords[i][2] ,coords[i][3]]
		else:
			assert(False) # Unreachable code
		
# 		Load micrograph from the file
		immic = get_im(f_mic)
		
# 		Calculate the new pixel size
		resample_ratio = options.resample_ratio
		if options.importctf != None:		
			pixel_size_orig = ctf[3]
			
			if resample_ratio < 1.0:
				assert(resample_ratio > 0.0)
				new_pixel_size = pixel_size_orig / resample_ratio
				print_msg('Resample micrograph to pixel size %6.4f and window segments from resampled micrograph\n' % new_pixel_size)
			else:
				# assert(resample_ratio == 1.0)
				new_pixel_size = pixel_size_orig
		
# 			Set ctf along with new pixel size in resampled micrograph
			ctf[3] = new_pixel_size	
		else:
			assert(options.importctf == None)
			if resample_ratio < 1.0:
				assert(resample_ratio > 0.0)
				print_msg('Resample micrograph with ratio %6.4f and window segments from resampled micrograph\n' % resample_ratio)
			# else:
			#	assert(resample_ratio == 1.0)
			
# 		Apply filters to micrograph
		fftip(immic)
		if options.limitctf:
			assert(options.importctf != None)
# 			Cut off frequency components higher than CTF limit 
			q1, q2 = ctflimit(box_size,ctf[0],ctf[1],ctf[2],new_pixel_size)
			
# 			This is absolute frequency of the CTF limit in the scale of original micrograph
			if resample_ratio < 1.0:
				assert(resample_ratio > 0.0)
				q1 = resample_ratio * q1 / float(box_size) # q1 = (pixel_size_orig / new_pixel_size) * q1/float(box_size)
			else:
				# assert(resample_ratio == 1.0) -> pixel_size_orig == new_pixel_size -> pixel_size_orig / new_pixel_size == 1.0
				q1 = q1 / float(box_size)
			
			if q1 < 0.5:          #@ming
				immic = filt_tanl(immic, q1, 0.01)
				cutoffhistogram.append(q1)
		
# 		Cut off frequency components lower than the box size can express 
		immic = fft(filt_gaussh( immic, resample_ratio/box_size ))
		
# 		Resample micrograph, map coordinates, and window segments from resampled micrograph using new coordinates
# 		after resampling by resample_ratio, new pixel size will be pixel_size/resample_ratio = new_pixel_size
#		NOTE: 2015/04/13 Toshio Moriya
#		resample() efficiently takes care of the case resample_ratio = 1.0 but
#		it does not set apix_*. Even though it sets apix_* when resample_ratio < 1.0 ...
		immic = resample(immic, resample_ratio)
				
		if options.invert:
			stt = Util.infomask(immic, None, True)
			Util.mul_scalar(immic, -1.0)
			immic += 2*stt[0]
		
		if options.importctf != None:
			from utilities import generate_ctf
			ctf = generate_ctf(ctf)

# 		Prepare loop variables
		nx = immic.get_xsize() 
		ny = immic.get_ysize()
		x0 = nx//2
		y0 = ny//2
		print_msg('\n')
		print_msg('Micrograph size := (%6d, %6d)\n' % (nx, ny))

		otcl_images  = "bdb:%s/" % options.outdir + basename + options.outsuffix
		ind = 0
		
		n_coordinates_reject_out_of_boundary = 0
		
# 		Loop over coordinates
		for i in range(len(coords)):
		
			source_x = int(coords[i][0])
			source_y = int(coords[i][1])
					
			x = source_x		
			y = source_y
			
			if resample_ratio < 1.0:
				assert(resample_ratio > 0.0)
				x = int(x * resample_ratio)	
				y = int(y * resample_ratio)
			# else:
			# 	assert(resample_ratio == 1.0)
				
			if( (0 <= x - box_half) and ( x + box_half <= nx ) and (0 <= y - box_half) and ( y + box_half <= ny ) ):
				imw = Util.window(immic, box_size, box_size, 1, x-x0, y-y0)
			else:
				print_msg('Coordinates ID = %04d (x = %4d, y = %4d, box_size = %4d) is out of micrograph bound, skipping ....\n' % (i, x, y, box_size))
				n_coordinates_reject_out_of_boundary += 1
				continue
			
			imw = ramp(imw)
			stat = Util.infomask( imw, mask, False )
			imw -= stat[0]
			imw /= stat[1]

#			NOTE: 2015/04/09 Toshio Moriya
#		    ptcl_source_image might be redundant information ...
#		    Consider re-organizing header entries...
			imw.set_attr("ptcl_source_image", f_mic)
			imw.set_attr("ptcl_source_coord_id", i)
			imw.set_attr("ptcl_source_coord", [source_x, source_y])
			imw.set_attr("resample_ratio", resample_ratio)
						
#			NOTE: 2015/04/13 Toshio Moriya
#			apix_* attributes are updated by resample() only when resample_ratio != 1.0
# 			Let's make sure header info is consistent by setting apix_* = 1.0 
#			regardless of options, so it is not passed down the processing line
			imw.set_attr("apix_x", 1.0)
			imw.set_attr("apix_y", 1.0)
			imw.set_attr("apix_z", 1.0)			
			if options.importctf != None:
				imw.set_attr("ctf",ctf)
				imw.set_attr("ctf_applied", 0)
				imw.set_attr("pixel_size_orig", pixel_size_orig)
				# imw.set_attr("apix_x", new_pixel_size)
				# imw.set_attr("apix_y", new_pixel_size)
				# imw.set_attr("apix_z", new_pixel_size)
#			NOTE: 2015/04/13 Toshio Moriya 
#			Pawel Comment: Micrograph is not supposed to have CTF header info.
#			So, let's assume it does not exist & ignore its presence.
#           Note that resample() "correctly" updates pixel size of CTF header info if it exists
			# elif (imw.has_ctff()):
			# 	assert(options.importctf == None)
			# 	ctf_origin = imw.get_attr("ctf")
			# 	pixel_size_origin = round(ctf_origin.apix, 5) # Because SXCTER ouputs up to 5 digits 
			# 	imw.set_attr("apix_x",pixel_size_origin)
			# 	imw.set_attr("apix_y",pixel_size_origin)
			# 	imw.set_attr("apix_z",pixel_size_origin)	
			
			imw.write_image(otcl_images, ind)
			ind += 1
		
		n_total_coordinates_detect += len(coords)
		n_total_coordinates_process += ind
		n_total_coordinates_reject_out_of_boundary += n_coordinates_reject_out_of_boundary
		
#		Print out the summary of this micrograph
		print_msg('\n')
		print_msg('Micrograph summary of coordinates...\n')
		print_msg('Detected                        : %4d\n' % (len(coords)))
		print_msg('Processed                       : %4d\n' % (ind))
		print_msg('Rejected by out of boundary     : %4d\n' % (n_coordinates_reject_out_of_boundary))
	
	if options.limitctf:
#		Print out the summary of CTF-limit filtering
		print_msg('\n')
		print_msg('Global summary of CTF-limit filtering (--limitctf) ...\n')
		print_msg('Percentage of filtered micrographs: %8.2f\n' % (len(cutoffhistogram) * 100.0 / len(micnames)))
		
		lhist = 10
		if len(cutoffhistogram) >= lhist:
			from statistics import hist_list
			region,hist = hist_list(cutoffhistogram, lhist)	
			print_msg("      Histogram of cut off frequency\n")
			print_msg("      ERROR       number of frequencies\n")
			for lhx in xrange(lhist):
				print_msg(" %14.7f     %7d\n" % (region[lhx], hist[lhx]))  # print_msg(" %10.3f     %7d\n" % (region[lhx], hist[lhx]))  
		else:
			print_msg("The number of filtered micrographs (%d) is less than the number of bins (%d). No histogram is produced.\n" % (len(cutoffhistogram), lhist))
		
#	Print out the summary of all micrographs
	print_msg('\n')
	print_msg('Global summary of micrographs ...\n')
	print_msg('Detected                        : %6d\n' % (len(micnames)))
	print_msg('Processed                       : %6d\n' % (n_micrographs_process))
	print_msg('Rejected by no micrograph file  : %6d\n' % (n_micrographs_reject_no_micrograph))
	print_msg('Rejected by no coordinates file : %6d\n' % (n_micrographs_reject_no_coordinates))
	print_msg('Rejected by no CTER entry       : %6d\n' % (n_micrographs_reject_no_cter_entry))
	print_msg('\n')
	print_msg('Global summary of coordinates ...\n')
	print_msg('Detected                        : %6d\n' % (n_total_coordinates_detect))
	print_msg('Processed                       : %6d\n' % (n_total_coordinates_process))
	print_msg('Rejected by out of boundary     : %6d\n' % (n_total_coordinates_reject_out_of_boundary))
	print_msg('\n')
Example #6
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = progname + """  input_micrograph_list_file  input_micrograph_pattern  input_coordinates_pattern  output_directory  --coordinates_format  --box_size=box_size  --invert  --import_ctf=ctf_file  --limit_ctf  --resample_ratio=resample_ratio  --defocus_error=defocus_error  --astigmatism_error=astigmatism_error
	
Window particles from micrographs in input list file. The coordinates of the particles should be given as input.
Please specify name pattern of input micrographs and coordinates files with a wild card (*). Use the wild card to indicate the place of micrograph ID (e.g. serial number, time stamp, and etc). 
The name patterns must be enclosed by single quotes (') or double quotes ("). (Note: sxgui.py automatically adds single quotes (')). 
BDB files can not be selected as input micrographs.
	
	sxwindow.py  mic_list.txt  ./mic*.hdf  info/mic*_info.json  particles  --coordinates_format=eman2  --box_size=64  --invert  --import_ctf=outdir_cter/partres/partres.txt
	
If micrograph list file name is not provided, all files matched with the micrograph name pattern will be processed.
	
	sxwindow.py  ./mic*.hdf  info/mic*_info.json  particles  --coordinates_format=eman2  --box_size=64  --invert  --import_ctf=outdir_cter/partres/partres.txt
	
"""
	parser = OptionParser(usage, version=SPARXVERSION)
	parser.add_option("--coordinates_format",  type="string",        default="eman1",   help="format of input coordinates files: 'sparx', 'eman1', 'eman2', or 'spider'. the coordinates of sparx, eman2, and spider format is particle center. the coordinates of eman1 format is particle box conner associated with the original box size. (default eman1)")
	parser.add_option("--box_size",            type="int",           default=256,       help="x and y dimension of square area to be windowed (in pixels): pixel size after resampling is assumed when resample_ratio < 1.0 (default 256)")
	parser.add_option("--invert",              action="store_true",  default=False,     help="invert image contrast: recommended for cryo data (default False)")
	parser.add_option("--import_ctf",          type="string",        default="",        help="file name of sxcter output: normally partres.txt (default none)") 
	parser.add_option("--limit_ctf",           action="store_true",  default=False,     help="filter micrographs based on the CTF limit: this option requires --import_ctf. (default False)")	
	parser.add_option("--resample_ratio",      type="float",         default=1.0,       help="ratio of new to old image size (or old to new pixel size) for resampling: Valid range is 0.0 < resample_ratio <= 1.0. (default 1.0)")
	parser.add_option("--defocus_error",       type="float",         default=1000000.0, help="defocus errror limit: exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocus_error percent. the error is computed as (std dev defocus)/defocus*100%. (default 1000000.0)" )
	parser.add_option("--astigmatism_error",   type="float",         default=360.0,     help="astigmatism error limit: Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatism_error degrees. (default 360.0)")

	### detect if program is running under MPI
	RUNNING_UNDER_MPI = "OMPI_COMM_WORLD_SIZE" in os.environ
	
	main_node = 0
	
	if RUNNING_UNDER_MPI:
		from mpi import mpi_init
		from mpi import MPI_COMM_WORLD, mpi_comm_rank, mpi_comm_size, mpi_barrier, mpi_reduce, MPI_INT, MPI_SUM
		
		
		mpi_init(0, [])
		myid = mpi_comm_rank(MPI_COMM_WORLD)
		number_of_processes = mpi_comm_size(MPI_COMM_WORLD)
	else:
		number_of_processes = 1
		myid = 0
	
	(options, args) = parser.parse_args(sys.argv[1:])
	
	mic_list_file_path = None
	mic_pattern = None
	coords_pattern = None
	error_status = None
	while True:
		if len(args) < 3 or len(args) > 4:
			error_status = ("Please check usage for number of arguments.\n Usage: " + usage + "\n" + "Please run %s -h for help." % (progname), getframeinfo(currentframe()))
			break
		
		if len(args) == 3:
			mic_pattern = args[0]
			coords_pattern = args[1]
			out_dir = args[2]
		else: # assert(len(args) == 4)
			mic_list_file_path = args[0]
			mic_pattern = args[1]
			coords_pattern = args[2]
			out_dir = args[3]
		
		if mic_list_file_path != None:
			if os.path.splitext(mic_list_file_path)[1] != ".txt":
				error_status = ("Extension of input micrograph list file must be \".txt\". Please check input_micrograph_list_file argument. Run %s -h for help." % (progname), getframeinfo(currentframe()))
				break
		
		if mic_pattern[:len("bdb:")].lower() == "bdb":
			error_status = ("BDB file can not be selected as input micrographs. Please convert the format, and restart the program. Run %s -h for help." % (progname), getframeinfo(currentframe()))
			break
		
		if mic_pattern.find("*") == -1:
			error_status = ("Input micrograph file name pattern must contain wild card (*). Please check input_micrograph_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe()))
			break
		
		if coords_pattern.find("*") == -1:
			error_status = ("Input coordinates file name pattern must contain wild card (*). Please check input_coordinates_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe()))
			break
		
		if myid == main_node:
			if os.path.exists(out_dir):
				error_status = ("Output directory exists. Please change the name and restart the program.", getframeinfo(currentframe()))
				break

		break
	if_error_then_all_processes_exit_program(error_status)
	
	# Check invalid conditions of options
	check_options(options, progname)
	
	mic_name_list = None
	error_status = None
	if myid == main_node:
		if mic_list_file_path != None:
			print("Loading micrograph list from %s file ..." % (mic_list_file_path))
			mic_name_list = read_text_file(mic_list_file_path)
			if len(mic_name_list) == 0:
				print("Directory of first micrograph entry is " % (os.path.dirname(mic_name_list[0])))
		else: # assert (mic_list_file_path == None)
			print("Generating micrograph list in %s directory..." % (os.path.dirname(mic_pattern)))
			mic_name_list = glob.glob(mic_pattern)
		if len(mic_name_list) == 0:
			error_status = ("No micrograph file is found. Please check input_micrograph_pattern and/or input_micrograph_list_file argument. Run %s -h for help." % (progname), getframeinfo(currentframe()))
		else:
			print("Found %d microgarphs" % len(mic_name_list))
			
	if_error_then_all_processes_exit_program(error_status)
	if RUNNING_UNDER_MPI:
		mic_name_list = wrap_mpi_bcast(mic_name_list, main_node)
	
	coords_name_list = None
	error_status = None
	if myid == main_node:
		coords_name_list = glob.glob(coords_pattern)
		if len(coords_name_list) == 0:
			error_status = ("No coordinates file is found. Please check input_coordinates_pattern argument. Run %s -h for help." % (progname), getframeinfo(currentframe()))
	if_error_then_all_processes_exit_program(error_status)
	if RUNNING_UNDER_MPI:
		coords_name_list = wrap_mpi_bcast(coords_name_list, main_node)
	
##################################################################################################################################################################################################################	
##################################################################################################################################################################################################################	
##################################################################################################################################################################################################################	

	# all processes must have access to indices
	if options.import_ctf:
		i_enum = -1
		i_enum += 1; idx_cter_def          = i_enum # defocus [um]; index must be same as ctf object format
		i_enum += 1; idx_cter_cs           = i_enum # Cs [mm]; index must be same as ctf object format
		i_enum += 1; idx_cter_vol          = i_enum # voltage[kV]; index must be same as ctf object format
		i_enum += 1; idx_cter_apix         = i_enum # pixel size [A]; index must be same as ctf object format
		i_enum += 1; idx_cter_bfactor      = i_enum # B-factor [A^2]; index must be same as ctf object format
		i_enum += 1; idx_cter_ac           = i_enum # amplitude contrast [%]; index must be same as ctf object format
		i_enum += 1; idx_cter_astig_amp    = i_enum # astigmatism amplitude [um]; index must be same as ctf object format
		i_enum += 1; idx_cter_astig_ang    = i_enum # astigmatism angle [degree]; index must be same as ctf object format
		i_enum += 1; idx_cter_sd_def       = i_enum # std dev of defocus [um]
		i_enum += 1; idx_cter_sd_astig_amp = i_enum # std dev of ast amp [A]
		i_enum += 1; idx_cter_sd_astig_ang = i_enum # std dev of ast angle [degree]
		i_enum += 1; idx_cter_cv_def       = i_enum # coefficient of variation of defocus [%]
		i_enum += 1; idx_cter_cv_astig_amp = i_enum # coefficient of variation of ast amp [%]
		i_enum += 1; idx_cter_spectra_diff = i_enum # average of differences between with- and without-astig. experimental 1D spectra at extrema
		i_enum += 1; idx_cter_error_def    = i_enum # frequency at which signal drops by 50% due to estimated error of defocus alone [1/A]
		i_enum += 1; idx_cter_error_astig  = i_enum # frequency at which signal drops by 50% due to estimated error of defocus and astigmatism [1/A]
		i_enum += 1; idx_cter_error_ctf    = i_enum # limit frequency by CTF error [1/A]
		i_enum += 1; idx_cter_mic_name     = i_enum # micrograph name
		i_enum += 1; n_idx_cter            = i_enum
	
	
	# Prepare loop variables
	mic_basename_pattern = os.path.basename(mic_pattern)              # file pattern without path
	mic_baseroot_pattern = os.path.splitext(mic_basename_pattern)[0]  # file pattern without path and extension
	coords_format = options.coordinates_format.lower()
	box_size = options.box_size
	box_half = box_size // 2
	mask2d = model_circle(box_size//2, box_size, box_size) # Create circular 2D mask to Util.infomask of particle images
	resample_ratio = options.resample_ratio
	
	n_mic_process = 0
	n_mic_reject_no_coords = 0
	n_mic_reject_no_cter_entry = 0
	n_global_coords_detect = 0
	n_global_coords_process = 0
	n_global_coords_reject_out_of_boundary = 0
	
	serial_id_list = []
	error_status = None
	## not a real while, an if with the opportunity to use break when errors need to be reported
	while myid == main_node:
		# 
		# NOTE: 2016/05/24 Toshio Moriya
		# Now, ignores the path in mic_pattern and entries of mic_name_list to create serial ID
		# Only the basename (file name) in micrograph path must be match
		# 
		# Create list of micrograph serial ID
		# Break micrograph name pattern into prefix and suffix to find the head index of the micrograph serial id
		# 
		mic_basename_tokens = mic_basename_pattern.split('*')
		# assert (len(mic_basename_tokens) == 2)
		serial_id_head_index = len(mic_basename_tokens[0])
		# Loop through micrograph names
		for mic_name in mic_name_list:
			# Find the tail index of the serial id and extract serial id from the micrograph name
			mic_basename = os.path.basename(mic_name)
			serial_id_tail_index = mic_basename.index(mic_basename_tokens[1])
			serial_id = mic_basename[serial_id_head_index:serial_id_tail_index]
			serial_id_list.append(serial_id)
		# assert (len(serial_id_list) == len(mic_name))
		del mic_name_list # Do not need this anymore
		
		# Load CTFs if necessary
		if options.import_ctf:
			
			ctf_list = read_text_row(options.import_ctf)
			# print("Detected CTF entries : %6d ..." % (len(ctf_list)))
			
			if len(ctf_list) == 0:
				error_status = ("No CTF entry is found in %s. Please check --import_ctf option. Run %s -h for help." % (options.import_ctf, progname), getframeinfo(currentframe()))
				break
			
			if (len(ctf_list[0]) != n_idx_cter):
				error_status = ("Number of columns (%d) must be %d in %s. The format might be old. Please run sxcter.py again." % (len(ctf_list[0]), n_idx_cter, options.import_ctf), getframeinfo(currentframe()))
				break
			
			ctf_dict={}
			n_reject_defocus_error = 0
			ctf_error_limit = [options.defocus_error/100.0, options.astigmatism_error]
			for ctf_params in ctf_list:
				assert(len(ctf_params) == n_idx_cter)
				# mic_baseroot is name of micrograph minus the path and extension
				mic_baseroot = os.path.splitext(os.path.basename(ctf_params[idx_cter_mic_name]))[0]
				if(ctf_params[idx_cter_sd_def] / ctf_params[idx_cter_def] > ctf_error_limit[0]):
					print("Defocus error %f exceeds the threshold. Micrograph %s is rejected." % (ctf_params[idx_cter_sd_def] / ctf_params[idx_cter_def], mic_baseroot))
					n_reject_defocus_error += 1
				else:
					if(ctf_params[idx_cter_sd_astig_ang] > ctf_error_limit[1]):
						ctf_params[idx_cter_astig_amp] = 0.0
						ctf_params[idx_cter_astig_ang] = 0.0
					ctf_dict[mic_baseroot] = ctf_params
			del ctf_list # Do not need this anymore
		
		break
		
	if_error_then_all_processes_exit_program(error_status)

	if options.import_ctf:
		if options.limit_ctf:
			cutoff_histogram = []  #@ming compute the histogram for micrographs cut of by ctf_params limit.
	
##################################################################################################################################################################################################################	
##################################################################################################################################################################################################################	
##################################################################################################################################################################################################################	
	
	restricted_serial_id_list = []
	if myid == main_node:
		# Loop over serial IDs of micrographs
		for serial_id in serial_id_list:
			# mic_baseroot is name of micrograph minus the path and extension
			mic_baseroot = mic_baseroot_pattern.replace("*", serial_id)
			mic_name = mic_pattern.replace("*", serial_id)
			coords_name = coords_pattern.replace("*", serial_id)
			
			########### # CHECKS: BEGIN
			if coords_name not in coords_name_list:
				print("    Cannot read %s. Skipping %s ..." % (coords_name, mic_baseroot))
				n_mic_reject_no_coords += 1
				continue
			
			# IF mic is in CTER results
			if options.import_ctf:
				if mic_baseroot not in ctf_dict:
					print("    Is not listed in CTER results. Skipping %s ..." % (mic_baseroot))
					n_mic_reject_no_cter_entry += 1
					continue
				else:
					ctf_params = ctf_dict[mic_baseroot]
			# CHECKS: END
			
			n_mic_process += 1
			
			restricted_serial_id_list.append(serial_id)
		# restricted_serial_id_list = restricted_serial_id_list[:128]  ## for testing against the nonMPI version

	
	if myid != main_node:
		if options.import_ctf:
			ctf_dict = None

	error_status = None
	if len(restricted_serial_id_list) < number_of_processes:
		error_status = ('Number of processes (%d) supplied by --np in mpirun cannot be greater than %d (number of micrographs that satisfy all criteria to be processed) ' % (number_of_processes, len(restricted_serial_id_list)), getframeinfo(currentframe()))
	if_error_then_all_processes_exit_program(error_status)

	## keep a copy of the original output directory where the final bdb will be created
	original_out_dir = out_dir
	if RUNNING_UNDER_MPI:
		mpi_barrier(MPI_COMM_WORLD)
		restricted_serial_id_list = wrap_mpi_bcast(restricted_serial_id_list, main_node)
		mic_start, mic_end = MPI_start_end(len(restricted_serial_id_list), number_of_processes, myid)
		restricted_serial_id_list_not_sliced = restricted_serial_id_list
		restricted_serial_id_list = restricted_serial_id_list[mic_start:mic_end]
	
		if options.import_ctf:
			ctf_dict = wrap_mpi_bcast(ctf_dict, main_node)

		# generate subdirectories of out_dir, one for each process
		out_dir = os.path.join(out_dir,"%03d"%myid)
	
	if myid == main_node:
		print("Micrographs processed by main process (including percent complete):")

	len_processed_by_main_node_divided_by_100 = len(restricted_serial_id_list)/100.0

##################################################################################################################################################################################################################	
##################################################################################################################################################################################################################	
##################################################################################################################################################################################################################	
#####  Starting main parallel execution

	for my_idx, serial_id in enumerate(restricted_serial_id_list):
		mic_baseroot = mic_baseroot_pattern.replace("*", serial_id)
		mic_name = mic_pattern.replace("*", serial_id)
		coords_name = coords_pattern.replace("*", serial_id)

		if myid == main_node:
			print(mic_name, " ---> % 2.2f%%"%(my_idx/len_processed_by_main_node_divided_by_100))
		mic_img = get_im(mic_name)

		# Read coordinates according to the specified format and 
		# make the coordinates the center of particle image 
		if coords_format == "sparx":
			coords_list = read_text_row(coords_name)
		elif coords_format == "eman1":
			coords_list = read_text_row(coords_name)
			for i in xrange(len(coords_list)):
				coords_list[i] = [(coords_list[i][0] + coords_list[i][2] // 2), (coords_list[i][1] + coords_list[i][3] // 2)]
		elif coords_format == "eman2":
			coords_list = js_open_dict(coords_name)["boxes"]
			for i in xrange(len(coords_list)):
				coords_list[i] = [coords_list[i][0], coords_list[i][1]]
		elif coords_format == "spider":
			coords_list = read_text_row(coords_name)
			for i in xrange(len(coords_list)):
				coords_list[i] = [coords_list[i][2], coords_list[i][3]]
			# else: assert (False) # Unreachable code
		
		# Calculate the new pixel size
		if options.import_ctf:
			ctf_params = ctf_dict[mic_baseroot]
			pixel_size_origin = ctf_params[idx_cter_apix]
			
			if resample_ratio < 1.0:
				# assert (resample_ratio > 0.0)
				new_pixel_size = pixel_size_origin / resample_ratio
				print("Resample micrograph to pixel size %6.4f and window segments from resampled micrograph." % new_pixel_size)
			else:
				# assert (resample_ratio == 1.0)
				new_pixel_size = pixel_size_origin
		
			# Set ctf along with new pixel size in resampled micrograph
			ctf_params[idx_cter_apix] = new_pixel_size
		else:
			# assert (not options.import_ctf)
			if resample_ratio < 1.0:
				# assert (resample_ratio > 0.0)
				print("Resample micrograph with ratio %6.4f and window segments from resampled micrograph." % resample_ratio)
			# else:
			#	assert (resample_ratio == 1.0)
		
		# Apply filters to micrograph
		fftip(mic_img)
		if options.limit_ctf:
			# assert (options.import_ctf)
			# Cut off frequency components higher than CTF limit 
			q1, q2 = ctflimit(box_size, ctf_params[idx_cter_def], ctf_params[idx_cter_cs], ctf_params[idx_cter_vol], new_pixel_size)
			
			# This is absolute frequency of CTF limit in scale of original micrograph
			if resample_ratio < 1.0:
				# assert (resample_ratio > 0.0)
				q1 = resample_ratio * q1 / float(box_size) # q1 = (pixel_size_origin / new_pixel_size) * q1/float(box_size)
			else:
				# assert (resample_ratio == 1.0) -> pixel_size_origin == new_pixel_size -> pixel_size_origin / new_pixel_size == 1.0
				q1 = q1 / float(box_size)
			
			if q1 < 0.5:
				mic_img = filt_tanl(mic_img, q1, 0.01)
				cutoff_histogram.append(q1)
		
		# Cut off frequency components lower than the box size can express 
		mic_img = fft(filt_gaussh(mic_img, resample_ratio / box_size))
		
		# Resample micrograph, map coordinates, and window segments from resampled micrograph using new coordinates
		# after resampling by resample_ratio, new pixel size will be pixel_size/resample_ratio = new_pixel_size
		# NOTE: 2015/04/13 Toshio Moriya
		# resample() efficiently takes care of the case resample_ratio = 1.0 but
		# it does not set apix_*. Even though it sets apix_* when resample_ratio < 1.0 ...
		mic_img = resample(mic_img, resample_ratio)
		
		if options.invert:
			mic_stats = Util.infomask(mic_img, None, True) # mic_stat[0:mean, 1:SD, 2:min, 3:max]
			Util.mul_scalar(mic_img, -1.0)
			mic_img += 2 * mic_stats[0]
		
		if options.import_ctf:
			from utilities import generate_ctf
			ctf_obj = generate_ctf(ctf_params) # indexes 0 to 7 (idx_cter_def to idx_cter_astig_ang) must be same in cter format & ctf object format.
		
		# Prepare loop variables
		nx = mic_img.get_xsize() 
		ny = mic_img.get_ysize()
		x0 = nx//2
		y0 = ny//2

		n_coords_reject_out_of_boundary = 0
		local_stack_name  = "bdb:%s#" % out_dir + mic_baseroot + '_ptcls'
		local_particle_id = 0 # can be different from coordinates_id
		# Loop over coordinates
		for coords_id in xrange(len(coords_list)):
			
			x = int(coords_list[coords_id][0])
			y = int(coords_list[coords_id][1])
			
			if resample_ratio < 1.0:
				# assert (resample_ratio > 0.0)
				x = int(x * resample_ratio)	
				y = int(y * resample_ratio)
			# else:
			# 	assert(resample_ratio == 1.0)
				
			if( (0 <= x - box_half) and ( x + box_half <= nx ) and (0 <= y - box_half) and ( y + box_half <= ny ) ):
				particle_img = Util.window(mic_img, box_size, box_size, 1, x-x0, y-y0)
			else:
				print("In %s, coordinates ID = %04d (x = %4d, y = %4d, box_size = %4d) is out of micrograph bound, skipping ..." % (mic_baseroot, coords_id, x, y, box_size))
				n_coords_reject_out_of_boundary += 1
				continue
			
			particle_img = ramp(particle_img)
			particle_stats = Util.infomask(particle_img, mask2d, False) # particle_stats[0:mean, 1:SD, 2:min, 3:max]
			particle_img -= particle_stats[0]
			particle_img /= particle_stats[1]
			
			# NOTE: 2015/04/09 Toshio Moriya
			# ptcl_source_image might be redundant information ...
			# Consider re-organizing header entries...
			particle_img.set_attr("ptcl_source_image", mic_name)
			particle_img.set_attr("ptcl_source_coord_id", coords_id)
			particle_img.set_attr("ptcl_source_coord", [int(coords_list[coords_id][0]), int(coords_list[coords_id][1])])
			particle_img.set_attr("resample_ratio", resample_ratio)
			
			# NOTE: 2015/04/13 Toshio Moriya
			# apix_* attributes are updated by resample() only when resample_ratio != 1.0
			# Let's make sure header info is consistent by setting apix_* = 1.0 
			# regardless of options, so it is not passed down the processing line
			particle_img.set_attr("apix_x", 1.0)
			particle_img.set_attr("apix_y", 1.0)
			particle_img.set_attr("apix_z", 1.0)
			if options.import_ctf:
				particle_img.set_attr("ctf",ctf_obj)
				particle_img.set_attr("ctf_applied", 0)
				particle_img.set_attr("pixel_size_origin", pixel_size_origin)
				# particle_img.set_attr("apix_x", new_pixel_size)
				# particle_img.set_attr("apix_y", new_pixel_size)
				# particle_img.set_attr("apix_z", new_pixel_size)
			# NOTE: 2015/04/13 Toshio Moriya 
			# Pawel Comment: Micrograph is not supposed to have CTF header info.
			# So, let's assume it does not exist & ignore its presence.
			# Note that resample() "correctly" updates pixel size of CTF header info if it exists
			# elif (particle_img.has_ctff()):
			# 	assert(not options.import_ctf)
			# 	ctf_origin = particle_img.get_attr("ctf_obj")
			# 	pixel_size_origin = round(ctf_origin.apix, 5) # Because SXCTER ouputs up to 5 digits 
			# 	particle_img.set_attr("apix_x",pixel_size_origin)
			# 	particle_img.set_attr("apix_y",pixel_size_origin)
			# 	particle_img.set_attr("apix_z",pixel_size_origin)	
			
			# print("local_stack_name, local_particle_id", local_stack_name, local_particle_id)
			particle_img.write_image(local_stack_name, local_particle_id)
			local_particle_id += 1
		
		n_global_coords_detect += len(coords_list)
		n_global_coords_process += local_particle_id
		n_global_coords_reject_out_of_boundary += n_coords_reject_out_of_boundary
		
#		# MRK_DEBUG: Toshio Moriya 2016/05/03
#		# Following codes are for debugging bdb. Delete in future
#		result = db_check_dict(local_stack_name)
#		print('# MRK_DEBUG: result = db_check_dict(local_stack_name): %s' % (result))
#		result = db_list_dicts('bdb:%s' % out_dir)
#		print('# MRK_DEBUG: result = db_list_dicts(out_dir): %s' % (result))
#		result = db_get_image_info(local_stack_name)
#		print('# MRK_DEBUG: result = db_get_image_info(local_stack_name)', result)
		
		# Release the data base of local stack from this process
		# so that the subprocess can access to the data base
		db_close_dict(local_stack_name)
		
#		# MRK_DEBUG: Toshio Moriya 2016/05/03
#		# Following codes are for debugging bdb. Delete in future
#		cmd_line = "e2iminfo.py %s" % (local_stack_name)
#		print('# MRK_DEBUG: Executing the command: %s' % (cmd_line))
#		cmdexecute(cmd_line)
		
#		# MRK_DEBUG: Toshio Moriya 2016/05/03
#		# Following codes are for debugging bdb. Delete in future
#		cmd_line = "e2iminfo.py bdb:%s#data" % (out_dir)
#		print('# MRK_DEBUG: Executing the command: %s' % (cmd_line))
#		cmdexecute(cmd_line)
		
	if RUNNING_UNDER_MPI:
		if options.import_ctf:
			if options.limit_ctf:
				cutoff_histogram = wrap_mpi_gatherv(cutoff_histogram, main_node)

	if myid == main_node:
		if options.limit_ctf:
			# Print out the summary of CTF-limit filtering
			print(" ")
			print("Global summary of CTF-limit filtering (--limit_ctf) ...")
			print("Percentage of filtered micrographs: %8.2f\n" % (len(cutoff_histogram) * 100.0 / len(restricted_serial_id_list_not_sliced)))

			n_bins = 10
			if len(cutoff_histogram) >= n_bins:
				from statistics import hist_list
				cutoff_region, cutoff_counts = hist_list(cutoff_histogram, n_bins)
				print("      Histogram of cut-off frequency")
				print("      cut-off       counts")
				for bin_id in xrange(n_bins):
					print(" %14.7f     %7d" % (cutoff_region[bin_id], cutoff_counts[bin_id]))
			else:
				print("The number of filtered micrographs (%d) is less than the number of bins (%d). No histogram is produced." % (len(cutoff_histogram), n_bins))
	
	n_mic_process = mpi_reduce(n_mic_process, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD)
	n_mic_reject_no_coords = mpi_reduce(n_mic_reject_no_coords, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD)
	n_mic_reject_no_cter_entry = mpi_reduce(n_mic_reject_no_cter_entry, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD)
	n_global_coords_detect = mpi_reduce(n_global_coords_detect, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD)
	n_global_coords_process = mpi_reduce(n_global_coords_process, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD)
	n_global_coords_reject_out_of_boundary = mpi_reduce(n_global_coords_reject_out_of_boundary, 1, MPI_INT, MPI_SUM, main_node, MPI_COMM_WORLD)
	
	# Print out the summary of all micrographs
	if main_node == myid:
		print(" ")
		print("Global summary of micrographs ...")
		print("Detected                        : %6d" % (len(restricted_serial_id_list_not_sliced)))
		print("Processed                       : %6d" % (n_mic_process))
		print("Rejected by no coordinates file : %6d" % (n_mic_reject_no_coords))
		print("Rejected by no CTER entry       : %6d" % (n_mic_reject_no_cter_entry))
		print(" ")
		print("Global summary of coordinates ...")
		print("Detected                        : %6d" % (n_global_coords_detect))
		print("Processed                       : %6d" % (n_global_coords_process))
		print("Rejected by out of boundary     : %6d" % (n_global_coords_reject_out_of_boundary))
		# print(" ")
		# print("DONE!!!")
	
	mpi_barrier(MPI_COMM_WORLD)
	
	if main_node == myid:
	
		import time
		time.sleep(1)
		print("\n Creating bdb:%s/data\n"%original_out_dir)
		for proc_i in range(number_of_processes):
			mic_start, mic_end = MPI_start_end(len(restricted_serial_id_list_not_sliced), number_of_processes, proc_i)
			for serial_id in restricted_serial_id_list_not_sliced[mic_start:mic_end]:
				e2bdb_command = "e2bdb.py "
				mic_baseroot = mic_baseroot_pattern.replace("*", serial_id)
				if RUNNING_UNDER_MPI:
					e2bdb_command += "bdb:" + os.path.join(original_out_dir,"%03d/"%proc_i) + mic_baseroot + "_ptcls "
				else:
					e2bdb_command += "bdb:" + os.path.join(original_out_dir, mic_baseroot + "_ptcls ") 
				
				e2bdb_command += " --appendvstack=bdb:%s/data  1>/dev/null"%original_out_dir
				cmdexecute(e2bdb_command, printing_on_success = False)
				
		print("Done!\n")
				
	if RUNNING_UNDER_MPI:
		mpi_barrier(MPI_COMM_WORLD)
		from mpi import mpi_finalize
		mpi_finalize()

	sys.stdout.flush()
	sys.exit(0)