Esempio n. 1
0
def main():
	
	usage="""prog --path <path to previous spt or subtlt refinement> [options]
	This program will run subtilt refinement based on previous subtomogram or subtilt refinement."""
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)

	#parser.add_header(name="orblock0", help='Just a visual separation', title="Inputs", row=0, col=1, rowspan=1, colspan=3, mode="model")


	parser.add_argument("--path", type=str,help="Path to the previous spt/subtlt refinement", default=None, guitype='filebox',  browser="EMBrowserWidget(withmodal=True,multiselect=False)",row=1, col=0,rowspan=1, colspan=2)
	parser.add_argument("--iter", type=int,help="Start from iteration X of previous refinement", default=-1, guitype='intbox',row=1, col=2,rowspan=1, colspan=1)
	
	
	#####################
	parser.add_header(name="orblock1", help='Just a visual separation', title="Options", row=2, col=1, rowspan=1, colspan=3, mode="model")

	
	
	parser.add_argument("--niters", type=int,help="Run this many iterations. Default is 4.", default=4, guitype='intbox',row=3, col=0,rowspan=1, colspan=1)

	parser.add_argument("--keep", type=float,help="propotion of tilts to keep. default is 0.5", default=0.5, guitype='floatbox',row=3, col=1,rowspan=1, colspan=1)

	parser.add_argument("--maxalt", type=float,help="max altitude to insert to volume", default=90.0, guitype='floatbox',row=3, col=2,rowspan=1, colspan=1)	
	
	parser.add_argument("--mask", type=str, default="Auto" ,help="Refinement and reprojection masking.",guitype='strbox',row=4, col=0,rowspan=1, colspan=2)	
	
	parser.add_argument("--nogs", action="store_true", default=False ,help="Skip gold standard. This is not a great idea...", guitype='boolbox',row=4, col=2,rowspan=1, colspan=1)
	

	parser.add_argument("--threads", type=int,help="Number of CPU threads to use. Default is 12.", default=12, guitype='intbox',row=5, col=2,rowspan=1, colspan=1)
	parser.add_argument("--parallel", type=str,help="Thread/mpi parallelism to use. Default is thread:12", default="thread:12", guitype='strbox',row=5, col=0,rowspan=1, colspan=2)

	parser.add_argument("--buildsetonly", action="store_true", default=False ,help="will only prepare particle set for the refinement but skip the actual refinement process.",guitype='boolbox',row=6, col=0,rowspan=1, colspan=1)
	parser.add_argument("--resume", action="store_true", default=False ,help="continue from previous run",guitype='boolbox',row=6, col=1,rowspan=1, colspan=1)
	
	parser.add_argument("--reproject", action="store_true", default=False ,help="Reproject 3D particles into 2D particles.",guitype='boolbox',row=7, col=0,rowspan=1, colspan=1)
	
	parser.add_argument("--reprj_offset", type=str, default="" ,help="Offset translation before reprojection")
	parser.add_argument("--reprj_clip", type=int, default=-1 ,help="clip after reprojection")

	parser.add_argument("--tophat", type=str, default="auto" ,help="Filter option for refine_postprocess. auto: same as spt refinement; local; global;")
	parser.add_argument("--refineastep", type=float,help="Mean angular variation for refine alignment", default=2.)
	parser.add_argument("--refinentry", type=int,help="number of starting points for refine alignment", default=16)
	parser.add_argument("--maxshift", type=int,help="maximum shift allowed", default=8)


	parser.add_argument("--padby", type=float,default=1.5, help="pad by factor. default is 1.5")
	parser.add_argument("--output", type=str,help="Write results to this directory. We do not recommend changing this.", default="subtlt")#, guitype='intbox',row=2, col=1,rowspan=1, colspan=1)
	parser.add_argument("--debug", action="store_true", default=False ,help="Turn on debug mode. This will only process a small subset of the data (threads * 8 particles)")
	parser.add_argument("--localnorm",action="store_true",help="local normalization. do not use yet....",default=False)
	parser.add_argument("--sym", type=str,help="symmetry. will use symmetry from spt refinement by default", default="c1")
	parser.add_argument("--ppid", type=int,help="ppid...", default=-1)

	(options, args) = parser.parse_args()
	logid=E2init(sys.argv)
	

	itr = options.iter

	oldpath = options.path	
	if not oldpath:
		print("No input path. Exit.")
		return
	

	if options.iter != -1:
		itr = options.iter
	elif "spt" in oldpath:
		for f in sorted(os.listdir(oldpath)):
			if "particle_parms" in f:
				itrstr = f[15:].split(".")[0]
				if os.path.isfile(os.path.join(oldpath,"threed_{}.hdf".format(itrstr))):
					itr = int(itrstr)
	else:
		for f in sorted(os.listdir(oldpath)):
			if re.match("threed_[0-9][0-9].hdf",f):
				itr = int(f[7:].split(".")[0])

	
	if not os.path.isfile(os.path.join(oldpath,"threed_{:02d}.hdf".format(itr))):
		print("Could not locate {}/threed_{:02d}.hdf".format(oldpath,itr))
		print("Please specify the iteration number (--iter) of a completed subtomogram refinement.")
		return
	
	
	if "0_subtlt_params.json" in os.listdir(oldpath):
		print("Continuing from a subtilt refinement...")
		fromspt=False
	else:
		print("Start from a spt refinement...")
		fromspt=True
		
	if options.resume:
		if fromspt:
			print("Cannot resume from a spt refinement...")
			return
		path=oldpath
		e=EMData(os.path.join(path,"threed_{:02d}.hdf".format(itr)))
	else:
		path = make_path(options.output)
		print("Writing in {}...".format(path))
	
		oldmap = os.path.join(oldpath,"threed_{:02d}.hdf".format(itr))
		oem = os.path.join(oldpath,"threed_{:02d}_even.hdf".format(itr))
		oom = os.path.join(oldpath,"threed_{:02d}_odd.hdf".format(itr))
		oldfsc = os.path.join(oldpath, "fsc_masked_{:02d}.txt".format(itr))

		copy2(oldmap,os.path.join(path,"threed_00.hdf"))
		copy2(oldfsc, os.path.join(path, "fsc_masked_00.txt"))
		copy2(oem,os.path.join(path,"threed_00_even.hdf"))
		copy2(oom,os.path.join(path,"threed_00_odd.hdf"))
		
		if fromspt:
			oldparm = os.path.join(oldpath,"particle_parms_{:02d}.json".format(itr))
			copy2(oldparm,os.path.join(path,"particle_parms_00.json"))
		else:
			for eo in ["even", "odd"]:
				oali = os.path.join(oldpath,"ali_ptcls_{:02d}_{}.lst".format(itr, eo))
				copy2(oali,os.path.join(path,"ali_ptcls_00_{}.lst".format(eo)))


		e=EMData(os.path.join(path,"threed_00.hdf"))
	
	bxsz=e["nx"]
	apix=e["apix_x"]
	jd = js_open_dict(os.path.join(path, "0_subtlt_params.json"))
	jd.update(vars(options))
	jd["cmd"] = " ".join(sys.argv)
	jd["path"] = oldpath
	jd["iter"] = itr
	jd["output"] = path
	
	options.ptclkeep=1.0

	if fromspt:
		sptparms = os.path.join(oldpath,"0_spt_params.json")
	else:
		sptparms = os.path.join(oldpath,"0_subtlt_params.json")
		
	if os.path.isfile(sptparms):
		oldjd = js_open_dict(sptparms)
		#print(oldjd.keys())
		jd["mass"] = oldjd["mass"]
		jd["setsf"] = oldjd["setsf"]
		jd["sym"] = oldjd["sym"]
		jd["localfilter"]=oldjd["localfilter"]
		jd["mask"]=oldjd["mask"]
		if oldjd.has_key("radref"):
			jd["radref"]=oldjd["radref"]
			
		if fromspt:
			options.ptclkeep=oldjd["pkeep"]
			
		oldjd.close()
	else:
		print("Cannot find {}. exit.".format(sptparms))
		return
	
	if options.mask.lower()!="none":
		#print("Overwritting masking")
		if options.mask.lower()=="auto":
			jd["mask"]=""
		else:
			jd["mask"]=options.mask
	
	
	#if options.localfilter==0:
		#jd["localfilter"]=False
	#elif options.localfilter==1:
		#jd["localfilter"]=True
		
	if len(options.sym)>0:
		jd["sym"]=options.sym
		
	jsparams=jd.data
	jd.close()
	jd = jsparams

	if fromspt and not options.reproject:
		js=js_open_dict(os.path.join(path,"particle_parms_00.json"))
		k=list(js.keys())[0]
		src=eval(k)[0]
		
		print("loading 3D particles from {}".format(base_name(src)))
		print("box size {}, apix {:.2f}".format(bxsz, apix))

		lname=[os.path.join(path, "ali_ptcls_00_{}.lst".format(eo)) for eo in ["even", "odd"]]
		for l in lname:
			try: os.remove(l)
			except:pass
		
		lst=[LSXFile(m, False) for m in lname]
		n3d=len(list(js.keys()))
		if options.ptclkeep<1.0:
			score=[]
			for k in list(js.keys()):
				score.append(float(js[k]["score"]))
		
			simthr=np.sort(score)[int(len(score)*options.ptclkeep)]
			print("removing bad particles...")
			
		else:
			simthr=10000
			
		for ky in js.keys():
			
			src,ii=eval(ky)
			e=EMData(src, ii, True)
			fname=e["class_ptcl_src"]
			ids=e["class_ptcl_idxs"]
			#ky="('{}', {})".format(src, ii)
			dic=js[ky]
			xali=dic["xform.align3d"]
			scr=float(dic["score"])
			if scr>simthr:
				continue
			
			if "__even" in src:
				eo=0
			elif "__odd" in src:
				eo=1
			else:
				eo=ii%2
			
			#print(src, eo)
			
			for i in ids:
				try:
					m=EMData(fname, i, True)
				except:
					continue
				xf=m["xform.projection"]
				dc=xf.get_params("xyz")
				if abs(dc["ytilt"])>options.maxalt:
					continue
				rot=xf*xali.inverse()
				lst[eo].write(-1, i, fname, str(rot.get_params("eman")))
		for l in lst:
			l.close()
		js=None

	if fromspt and options.reproject:
		print("Reprojecting 3D particles...")
		
		if options.reprj_offset!="":
			options.reprj_offset=[int(i) for i in options.reprj_offset.split(',')]
			print("Offset by {} before reprojection".format(options.reprj_offset))
		else:
			options.reprj_offset=None
		
		if options.mask.lower()=="auto":
			mask=EMData(os.path.join(oldpath,"mask.hdf"))
			mask.process_inplace("filter.lowpass.gauss", {"cutoff_abs":.1})
			mask.add(0.5)
			mask.div(1.5)
			mask.write_image(os.path.join(path,"mask_reproj.hdf"))
			
		elif options.mask.lower()=="none":
			mask=None
			
		else:
			mask=EMData(options.mask)
			
		
		ptclfile=[os.path.join(path,"particles_reproj_{}.hdf".format(eo)) for eo in ["even","odd"]]
		for p in ptclfile:
			try: os.remove(p)
			except: pass
		
		lstfile=[os.path.join(path, "ali_ptcls_00_{}.lst".format(eo)) for eo in ["even", "odd"]]
		for p in lstfile:
			try: os.remove(p)
			except: pass


		js=js_open_dict(os.path.join(path,"particle_parms_00.json"))
		jsdata=dict(js.data)
		js.close()
			
		if options.ptclkeep<1.0:
			score=[]
			for k in list(jsdata.keys()):
				score.append(float(jsdata[k]["score"]))
		
			simthr=np.sort(score)[int(len(score)*options.ptclkeep)]
			print("removing bad particles with score > {:.2f}...".format(simthr))
			
		else:
			simthr=10000
			
		keys=list(jsdata.keys())
		nt=options.threads
		if options.debug:
			keys=keys[:nt*32]
		thrds=[threading.Thread(target=do_reproject,
				args=(jsdata, keys[i::nt], ptclfile, options, mask, simthr)) for i in range(nt)]

		print(len(thrds)," threads")
		thrtolaunch=0
		while thrtolaunch<len(thrds) or threading.active_count()>1:
			# If we haven't launched all threads yet, then we wait for an empty slot, and launch another
			# note that it's ok that we wait here forever, since there can't be new results if an existing
			# thread hasn't finished.
			if thrtolaunch<len(thrds) :
				while (threading.active_count()==options.threads ) : time.sleep(.1)
				#if options.verbose : print("Starting thread {}/{}".format(thrtolaunch,len(thrds)))
				thrds[thrtolaunch].start()
				thrtolaunch+=1
			else: time.sleep(1)
		
		for t in thrds:
			t.join()

		print("Now writing list files...")
		#### write a list file
		for k in [0,1]:
		
			n=EMUtil.get_image_count(ptclfile[k])
			lst=LSXFile(lstfile[k], False)
			for i in range(n):
				e=EMData(ptclfile[k], i, True)
				rot=e["xform.align3d"]
				lst.write(-1, i, ptclfile[k], str(rot.get_params("eman")))
			
			lst=None
			
			
			
	if options.buildsetonly: return

	
	if options.resume:
		starti=itr
	else:
		starti=0
	
	for itr in range(starti,options.niters+starti):

		

		for eo in ["even", "odd"]:
			
			if options.nogs:
				threedname=os.path.join(path, "threed_{:02d}.hdf".format(itr))
			else:
				threedname=os.path.join(path, "threed_{:02d}_{}.hdf".format(itr, eo))
			
			lstname=os.path.join(path, "ali_ptcls_{:02d}_{}.lst".format(itr, eo))
			lname=os.path.join(path, "ali_ptcls_{:02d}_{}.lst".format(itr+1, eo))
			threedout=os.path.join(path, "threed_{:02d}_{}.hdf".format(itr+1, eo))
			
			cmd="e2spt_tiltrefine_oneiter.py --ptclin {} --ptclout {} --ref {} --threedout {} --keep {} --threads {} --parallel {} --refineastep {} --refinentry {} --maxshift {} --padby {} --sym {}".format(lstname, lname, threedname, threedout,  options.keep, options.threads, options.parallel, options.refineastep, options.refinentry, options.maxshift, options.padby, jd["sym"])
			if options.debug: 
				cmd+=" --debug"
				
			run(cmd)
			
			run("e2proc3d.py {} {}".format(threedout, os.path.join(path, "threed_raw_{}.hdf".format(eo))))

		s = ""
		
		if jd.has_key("goldstandard"): 
			if jd["goldstandard"] > 0: 
				s += " --align"
		if jd.has_key("setsf"):
			s += " --setsf {}".format(jd['setsf']) #options.setsf)
		
		
		if options.tophat=="auto" and jd.has_key("localfilter") and jd["localfilter"]==True:
			s += " --tophat local"
		elif options.tophat=="local":
			s += " --tophat local"
		elif options.tophat=="global":
			s += " --tophat global"
			
		msk = jd["mask"] #{}/mask_tight.hdf".format(path)
		if len(msk)>0:
			if os.path.isfile(msk):
				msk=" --automask3d mask.fromfile:filename={}".format(msk)
			else:
				msk=" --automask3d {}".format(msk)
				
		s+=msk

		# get target resolution from last iteration map
		ref=os.path.join(path, "threed_{:02d}.hdf".format(itr))
		fsc=np.loadtxt(os.path.join(path, "fsc_masked_{:02d}.txt".format(itr)))
		
		try:
			rs=1./fsc[fsc[:,1]<0.3, 0][0]
		except:
			rs=10
			
		curres=rs*.5
		
		even=os.path.join(path, "threed_{:02d}_even.hdf".format(itr+1))
		odd=os.path.join(path, "threed_{:02d}_odd.hdf".format(itr+1))


		#os.system("rm {}/mask*.hdf {}/*unmasked.hdf".format(path, path))
		ppcmd="e2refine_postprocess.py --even {} --odd {} --output {} --iter {:d} --restarget {} --threads {} --sym {} --mass {} {}".format(even, odd, 
			os.path.join(path, "threed_{:02d}.hdf".format(itr+1)), itr+1, curres, options.threads, jd["sym"], jd["mass"], s)
		run(ppcmd)
		
		if options.localnorm:
			for f in [even, odd]:
				run("e2proc3d.py {} {} --process normalize --process normalize.local:threshold=1:radius=16".format(f,f))
				
			run("e2proc3d.py {} {} --addfile {} --process normalize".format(
				even, os.path.join(path, "threed_{:02d}.hdf".format(itr+1)), odd))

		fsc=np.loadtxt(os.path.join(path, "fsc_masked_{:02d}.txt".format(itr+1)))

		print("Resolution (FSC<0.3) is ~{:.1f} A".format(rs))
				
	E2end(logid)
Esempio n. 2
0
def main():
	
	usage=" "
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)

	parser.add_argument("--path", type=str,help="path", default=None, guitype='strbox',row=0, col=0,rowspan=1, colspan=1)
	parser.add_argument("--iter", type=int,help="start from iteration X", default=-1, guitype='intbox',row=0, col=1,rowspan=1, colspan=1)
	parser.add_argument("--niters", type=int,help="run this many iterations. Default is 4.", default=4, guitype='intbox',row=0, col=2,rowspan=1, colspan=1)

	parser.add_argument("--sym", type=str,help="symmetry. will use symmetry from spt refinement by default", default="", guitype='strbox',row=2, col=0,rowspan=1, colspan=1)
	parser.add_argument("--padby", type=float,help="pad by factor. default is 2", default=2., guitype='floatbox',row=1, col=1,rowspan=1, colspan=1)
	parser.add_argument("--keep", type=float,help="propotion of tilts to keep. default is 0.5", default=0.5, guitype='floatbox',row=1, col=2,rowspan=1, colspan=1)

	parser.add_argument("--maxalt", type=float,help="max altitude to insert to volume", default=90.0, guitype='floatbox',row=1, col=0,rowspan=1, colspan=1)	
	parser.add_argument("--nogs", action="store_true", default=False ,help="skip gold standard...", guitype='boolbox',row=2, col=1,rowspan=1, colspan=1)
	parser.add_argument("--localfilter", type=int, default=-1 ,help="use tophat local. specify 0 or 1 to overwrite the setting in the spt refinement")
	parser.add_argument("--mask", type=str, default="None" ,help="Refinement masking. default is the same as the spt refinement. Leave this empty for automasking",guitype='strbox',row=3, col=0,rowspan=1, colspan=2)	

	parser.add_argument("--threads", type=int,help="Number of CPU threads to use. Default is 12.", default=12, guitype='intbox',row=2, col=2,rowspan=1, colspan=1)
	parser.add_argument("--parallel", type=str,help="Thread/mpi parallelism to use. Default is thread:12", default="thread:12", guitype='strbox',row=4, col=0,rowspan=1, colspan=3)

	parser.add_argument("--refineastep", type=float,help="angular variation for refine alignment (gauss std)", default=8.)
	parser.add_argument("--refinentry", type=int,help="number of starting points for refine alignment", default=32)
	parser.add_argument("--maxshift", type=int,help="maximum shift allowed", default=10)

	parser.add_argument("--buildsetonly", action="store_true", default=False ,help="build sets only")
	parser.add_argument("--output", type=str,help="Write results to this directory. We do not recommend changing this.", default="subtlt")#, guitype='intbox',row=2, col=1,rowspan=1, colspan=1)

	parser.add_argument("--debug", action="store_true", default=False ,help="Turn on debug mode. This will only process a small subset of the data (threads * 8 particles)")
	parser.add_argument("--ppid", type=int,help="ppid...", default=-1)

	(options, args) = parser.parse_args()
	logid=E2init(sys.argv)

	itr = options.iter

	oldpath = options.path	
	if not oldpath:
		print("No input path. Exit.")
		return
	

	if options.iter != -1:
		itr = options.iter
	elif "spt" in oldpath:
		for f in sorted(os.listdir(oldpath)):
			if "particle_parms" in f:
				itrstr = f[15:].split(".")[0]
				if os.path.isfile("{}/threed_{}.hdf".format(oldpath,itrstr)):
					itr = int(itrstr)
	else:
		for f in sorted(os.listdir(oldpath)):
			if re.match("threed_[0-9][0-9].hdf",f):
				itr = int(f[7:].split(".")[0])

	# print(oldpath)
	fromspt=True
	if "0_subtlt_params.json" in os.listdir(oldpath):
		print("Continuing from a subtilt refinement...")
		fromspt=False
		
	
	path = make_path(options.output)
	
	if not os.path.isfile("{}/threed_{:02d}.hdf".format(oldpath,itr)):
		print("Could not locate {}/threed_{:02d}.hdf".format(oldpath,itr))
		print("Please specify the iteration number (--iter) of a completed subtomogram refinement.")
		sys.exit(1)
	#elif not os.path.isfile("{}/particle_parms_{:02d}.json".format(oldpath,itr)):
		#print("Could not locate {}/particle_parms_{:02d}.json".format(oldpath,itr))
		#print("Please specify the iteration number (--iter) of a completed subtomogram refinement.")
		#sys.exit(1)
	else:
		#copy2("{}/0_spt_params.json".format(oldpath),"{}/0_subtlt_params.json".format(path))

		oldmap = os.path.join(oldpath,"threed_{:02d}.hdf".format(itr))
		oem = os.path.join(oldpath,"threed_{:02d}_even.hdf".format(itr))
		oom = os.path.join(oldpath,"threed_{:02d}_odd.hdf".format(itr))
		oldfsc = os.path.join(oldpath, "fsc_masked_{:02d}.txt".format(itr))

		copy2(oldmap,os.path.join(path,"threed_00.hdf"))
		copy2(oldfsc, os.path.join(path, "fsc_masked_00.txt"))
		copy2(oem,os.path.join(path,"threed_00_even.hdf"))
		copy2(oom,os.path.join(path,"threed_00_odd.hdf"))
		
		if fromspt:
			oldparm = os.path.join(oldpath,"particle_parms_{:02d}.json".format(itr))
			copy2(oldparm,os.path.join(path,"particle_parms_00.json"))
		else:
			for eo in ["even", "odd"]:
				oali = os.path.join(oldpath,"ali_ptcls_{:02d}_{}.lst".format(itr, eo))
				copy2(oali,os.path.join(path,"ali_ptcls_00_{}.lst".format(eo)))


	e=EMData(os.path.join(path,"threed_00.hdf"))
	
	bxsz=e["nx"]
	apix=e["apix_x"]
	jd = js_open_dict("{}/0_subtlt_params.json".format(path))
	jd.update(vars(options))
	jd["cmd"] = " ".join(sys.argv)
	jd["path"] = oldpath
	jd["iter"] = itr
	jd["output"] = path

	if fromspt:
		sptparms = os.path.join(oldpath,"0_spt_params.json")
	else:
		sptparms = os.path.join(oldpath,"0_subtlt_params.json")
	if os.path.isfile(sptparms):
		oldjd = js_open_dict(sptparms)
		#print(oldjd.keys())
		jd["mass"] = oldjd["mass"]
		jd["setsf"] = oldjd["setsf"]
		jd["sym"] = oldjd["sym"]
		jd["localfilter"]=oldjd["localfilter"]
		jd["mask"]=oldjd["mask"]
		oldjd.close()
	else:
		print("Cannot find {}. exit.".format(sptparms))
	
	if options.mask.lower()!="none":
		print("Overwritting masking")
		jd["mask"]=options.mask
	
	if options.localfilter==0:
		jd["localfilter"]=False
	elif options.localfilter==1:
		jd["localfilter"]=True
		
	if len(options.sym)>0:
		jd["sym"]=options.sym
		
	jsparams=jd.data
	jd.close()
	jd = jsparams

	if fromspt:
		js=js_open_dict(os.path.join(path,"particle_parms_00.json"))
		k=list(js.keys())[0]
		src=eval(k)[0]
		
		print("loading 3D particles from {}".format(src))
		print("box size {}, apix {:.2f}".format(bxsz, apix))

		lname=[os.path.join(path, "ali_ptcls_00_{}.lst".format(eo)) for eo in ["even", "odd"]]
		for l in lname:
			try: os.remove(l)
			except:pass
		
		lst=[LSXFile(m, False) for m in lname]
		n3d=len(list(js.keys()))
		for ii in range(n3d):
			e=EMData(src, ii, True)
			fname=e["class_ptcl_src"]
			ids=e["class_ptcl_idxs"]
			ky="('{}', {})".format(src, ii)
			dic=js[ky]
			xali=dic["xform.align3d"]
			for i in ids:
				try:
					m=EMData(fname, i, True)
				except:
					continue
				xf=m["xform.projection"]
				dc=xf.get_params("xyz")
				if abs(dc["ytilt"])>options.maxalt:
					continue
				rot=xf*xali.inverse()
				lst[ii%2].write(-1, i, fname, str(rot.get_params("eman")))
		for l in lst:
			l.close()
		js=None

	if options.buildsetonly: return

	for itr in range(0,options.niters):

		from EMAN2PAR import EMTaskCustomer

		for eo in ["even", "odd"]:
			
			if options.nogs:
				threedname="{}/threed_{:02d}.hdf".format(path, itr)
			else:
				threedname="{}/threed_{:02d}_{}.hdf".format(path, itr, eo)
			
			lstname="{}/ali_ptcls_{:02d}_{}.lst".format(path, itr, eo)
			lst=LSXFile(lstname, True)
			m=EMData(threedname)
			
			m.process_inplace('normalize.edgemean')
			
			pinfo=[]
			if options.debug: nptcl=options.threads*8
			else: nptcl=lst.n
			for i in range(nptcl):
				pinfo.append(lst.read(i))
			lst=None
			
			etc=EMTaskCustomer(options.parallel)
			num_cpus = etc.cpu_est()
			
			print("{} total CPUs available".format(num_cpus))
			print("{} jobs".format(nptcl))
			
			infos=[[] for i in range(num_cpus)]
			for i,info in enumerate(pinfo):
				infos[i%num_cpus].append([i, info])
			
			tids=[]
			for info in infos:
				task = SptTltRefineTask(info, m, options)
				tid=etc.send_task(task)
				tids.append(tid)
			
			while 1:
				st_vals = etc.check_task(tids)
				#print("{:.1f}/{} finished".format(np.mean(st_vals), 100))
				#print(tids)
				if np.min(st_vals) == 100: break
				time.sleep(5)
			
			dics=[0]*nptcl
			for i in tids:
				ret=etc.get_results(i)[1]
				for r in ret:
					#print(r)
					ii=r.pop("idx")
					dics[ii]=r
			
			del etc
			
			allscr=np.array([d["score"] for d in dics])
			print(np.min(allscr), np.mean(allscr), np.max(allscr), np.std(allscr))
			allscr*=-1
			s=allscr.copy()
			s-=np.mean(s)
			s/=np.std(s)
			clp=2
			ol=abs(s)>clp
			print("Removing {} outliers from {} particles..".format(np.sum(ol), len(s)))
			s=old_div(old_div((s+clp),clp),2)
			s[ol]=0
			allscr=s
			#allscr-=np.min(allscr)-1e-5
			#allscr/=np.max(allscr)

			lname="{}/ali_ptcls_{:02d}_{}.lst".format(path, itr+1, eo)
			try: os.remove(lname)
			except: pass
			lout=LSXFile(lname, False)
			for i, dc in enumerate(dics):
				d=dc["xform.align3d"].get_params("eman")
				d["score"]=float(allscr[i])
				l=pinfo[i]
				lout.write(-1, l[0], l[1], str(d))

			lout=None

			pb=options.padby
			threedout="{}/threed_{:02d}_{}.hdf".format(path, itr+1, eo)
			cmd="e2make3dpar.py --input {inp} --output {out} --pad {pd} --padvol {pdv} --threads {trd} --outsize {bx} --apix {apx} --mode gauss_var --keep {kp} --sym {sm}".format(
				inp=lname, 
				out=threedout,
				bx=bxsz, pd=int(bxsz*pb), pdv=int(bxsz*pb), apx=apix, kp=options.keep, sm=jd["sym"], trd=options.threads)
			
			run(cmd)
			run("e2proc3d.py {} {}".format(threedout, "{}/threed_raw_{}.hdf".format(path, eo)))

		s = ""
		
		if jd.has_key("goldstandard"): 
			if jd["goldstandard"] > 0: 
				s += " --align"
		if jd.has_key("setsf"):
			s += " --setsf {}".format(jd['setsf']) #options.setsf)
		if jd.has_key("localfilter"):
			s += " --tophat local"
		msk = jd["mask"] #{}/mask_tight.hdf".format(path)
		if len(msk)>0:
			if os.path.isfile(msk):
				msk=" --automask3d mask.fromfile:filename={}".format(msk)
			else:
				msk=" --automask3d {}".format(msk)

		# get target resolution from last iteration map
		ref=os.path.join(path, "threed_{:02d}.hdf".format(itr))
		fsc=np.loadtxt(os.path.join(path, "fsc_masked_{:02d}.txt".format(itr)))
		rs=1./fsc[fsc[:,1]<0.3, 0][0]
		curres=rs*.5

		#os.system("rm {}/mask*.hdf {}/*unmasked.hdf".format(path, path))
		ppcmd="e2refine_postprocess.py --even {} --odd {} --output {} --iter {:d} --restarget {} --threads {} --sym {} --mass {} {}".format(
			os.path.join(path, "threed_{:02d}_even.hdf".format(itr+1)), 
			os.path.join(path, "threed_{:02d}_odd.hdf".format(itr+1)), 
			os.path.join(path, "threed_{:02d}.hdf".format(itr+1)), itr+1, curres, options.threads, jd["sym"], jd["mass"], s)
		run(ppcmd)

		fsc=np.loadtxt(os.path.join(path, "fsc_masked_{:02d}.txt".format(itr+1)))
		rs=1./fsc[fsc[:,1]<0.3, 0][0]
		print("Resolution (FSC<0.3) is ~{:.1f} A".format(rs))
				
	E2end(logid)
Esempio n. 3
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = """Usage: e2spt_pcasplit.py --path <spt_XX> [options]"""

	parser = EMArgumentParser(usage=usage,version=EMANVERSION)
	parser.add_argument("--path",type=str,required=True,default=None,help="Path to a folder where results should be stored, following standard naming conventions (default = spt_XX)",guitype='filebox',row=0, col=0, rowspan=1, colspan=2)
	parser.add_argument("--iter",type=int,required=True,help="Iteration number within path. Default is the second to last iteration (-2).",default=-2,guitype='intbox',row=1, col=0, rowspan=1, colspan=1)
	parser.add_argument("--nclass",type=int,required=True,help="Number of classes. Default is 2.",default=2,guitype="intbox",row=1, col=1, rowspan=1, colspan=1)
	parser.add_header(name="orblock1", help='', title="Optional", row=2, col=0, rowspan=1, colspan=2)
	parser.add_argument("--maxres",type=float,help="Filter particles to this resolution (in Angstroms) before classification",default=30.0,guitype="floatbox",row=3, col=0, rowspan=1, colspan=1)
	parser.add_argument("--sym",type=str,help="Apply this symmetry.",default="c1",guitype="strbox",row=3, col=1, rowspan=1, colspan=1)
	parser.add_argument("--mask",type=str,help="Apply this mask. Default is 'mask_tight.hdf' from <--path>_<--iter>. Specify 'none' for no masking",default="",guitype="filebox",row=4, col=0, rowspan=1, colspan=2)
	parser.add_argument("--nbasis",type=int,required=True,help="Number of PCA basis vectors. Default is 3.",default=3,guitype="intbox",row=5, col=0, rowspan=1, colspan=1)
	# parser.add_argument("--keepthresh",type=float,help="Center PCA outliers beyond this value before performing K-means clustering. Default is no threshold (-1).",default=,guitype="floatbox",row=5, col=0, rowspan=1, colspan=1)
	parser.add_argument("--nowedgefill",action='store_true',help="Do not fill the missing wedge before classification.",default=False,guitype="boolbox",row=5, col=1, rowspan=1, colspan=1)
	parser.add_argument("--clean",action='store_true',help="remove outliers before PCA.",default=False,guitype="boolbox",row=6, col=1, rowspan=1, colspan=1)
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness")
	parser.add_argument("--shrink",type=int,help="Shrink particles before classification",default=1)
	parser.add_argument("--dotest",type=int,help="test using N random particles",default=-1)
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)
	(options, args) = parser.parse_args()

	if options.path == None:
		print("You must specify the path to an existing spt_XX refinement directory.")
		sys.exit(1)
	if not os.path.isdir(options.path):
		print("Could not locate --path {}".format(options.path))
		sys.exit(1)

	if options.iter==-2:
		fls=[int(i[15:17]) for i in os.listdir(options.path) if i[:15]=="particle_parms_" and str.isdigit(i[15:17])]
		if len(fls)==0 : 
			print("No subtomogram alignment was complered for this refinement. Please try another")
			sys.exit(1)
		options.iter=max(fls)-1

	
	logid=E2init(sys.argv, options.ppid)

	threed = EMData("{}/threed_{:02d}.hdf".format(options.path,options.iter))
	 
	if options.mask=="none":
		msk=threed.copy()
		msk.to_one()
	elif options.mask=="":
		msk = EMData("{}/mask_tight.hdf".format(options.path))
	else:
		msk = EMData(options.mask)

	#refparms=js_open_dict("{}/0_spt_params.json".format(options.path))
	#try:
		#inptcls = refparms["input_ptcls"]
	#except:
		#print("Could not locate input particles. Path and iteration must reference a standard 3D refinement (e2spt_refine.py) and not a subtilt refinement (e2spt_subtilt.py).")
		#sys.exit(1)

	parmsfile = "{}/particle_parms_{:02d}.json".format(options.path,options.iter)
	js=js_open_dict(parmsfile)

	nptcl=len(js.keys())
	irange=np.arange(nptcl, dtype=int)
	if options.dotest>0:
		np.random.shuffle(irange)
		nptcl=min(nptcl, options.dotest)
		irange=irange[:nptcl]
		print("Test with {} particles".format(nptcl))
	inptcls=pname=eval(js.keys()[0])[0]
	

	
	print("Preprocessing {} particles...".format(nptcl))
	
	#### doing everything in fourier space with numpy
	
	data=[]
	wgs=[]
	keys=[]
	
	### to clip fourier space based on resolution
	sz=threed["nx"]//options.shrink
	apix=threed["apix_x"]*options.shrink
	freq=np.fft.fftfreq(sz, apix)[:sz//2]
	clip=sz//2-np.argmax(freq>1./options.maxres)
	if clip==sz//2: clip=0
	#n=EMUtil.get_image_count(pname)
	for i in irange.tolist():
		
		k="('{}', {})".format(pname, i)
		if js.has_key(k)==False: continue
		xf=js[k]['xform.align3d']
		e=EMData(pname, i)
		e.transform(xf)
		e.mult(msk)
		if options.shrink>1:
			e.process_inplace("math.meanshrink",{"n":options.shrink})
	
		en=e.numpy().copy()
		#print(en.shape, clip)
		#### numpy fft is actually significantly slower than EMAN fft. so probably should change to EMAN if I can get the coordinates right..
		
		ef=np.fft.fftshift(np.fft.fftn(en))
		if clip>0:
			ef=ef[clip:-clip,clip:-clip,clip:-clip]
		if len(data)==0:
			sz=len(ef)
			idx=np.indices((sz,sz,sz))-sz//2
			r=np.sqrt(np.sum(idx**2, 0))
			r = r.astype(np.int)
			nr = np.bincount(r.ravel())
			
		efa=abs(ef)
		tbin = np.bincount(r.ravel(), efa.ravel())
		sf = tbin / nr
		sf[sf==0]=1e-5
		div=np.divide(efa,sf[r])
		wdg=np.logical_and(div<1., r>1)
		ef[wdg]=0
		data.append(ef.flatten())
		wgs.append(wdg.flatten())
		keys.append(k)
		
		sys.stdout.write("\r   {}/{} particles".format(len(data),nptcl))
		sys.stdout.flush()
	print()

	js.close()
	data=np.array(data)
	wgs=np.array(wgs)
	print(data.shape)

	#avg=np.mean(data, axis=0)
	avg=np.sum(data, axis=0)
	w=np.sum(1-np.array(wgs), axis=0)+1
	avg=avg/w
	dv=data-avg
	std=np.std(abs(dv))
	dv/=std
	#for i,w in enumerate(wgs):
		#data[i][w]=avg[w]
	#dv=data
	imgsnp=np.hstack([dv.real, dv.imag])
	print(dv.real.shape, imgsnp.shape)
	
	options.outpath = make_path("sptcls")
	print("Output will be written to {}".format(options.outpath))

	#### real space version:
	
	#refft = threed.do_fft()
	#imgs00 = []
	#for i in range(nptcl):

		#sys.stdout.write("\r{}/{} particles".format(i+1,nptcl))
		#sys.stdout.flush()

		#k="('{}', {})".format(pname, i)
		#xf=js[k]['xform.align3d']

		#e=EMData(pname, i)		
		#e.transform(xf)
		#e["score"]=js[k]['score']
		
		#if options.nowedgefill: 
			#enew = e
		#else:
			#eft=e.do_fft()
			#eft.process_inplace("mask.wedgefill",{"fillsource":refft, "thresh_sigma":0.0})
			#enew=eft.do_ift()
		
		#enew.mult(msk)
		#enew.process_inplace("math.meanshrink",{"n":options.shrink})
		#enew.process_inplace("filter.lowpass.gauss",{"cutoff_freq":1/options.maxres})

		#if options.sym != None:
			#enew.process_inplace("xform.applysym",{"averager":"mean.tomo", "sym":options.sym})

		#enew["xform.align3d"]=xf

		#enew.write_image("{}/aliptcls.hdf".format(options.outpath), i)
		#imgs00.append(enew)

	#js.close()
	

	#nptcl = len(imgs00)
	#imgsnp=np.array([m.numpy().copy() for m in imgs00])
	
	print("Performing PCA...")
	#print(imgsnp)
	ptclids=np.arange(nptcl,dtype=int)[:, None]
	nptcl=len(imgsnp)
	
	if options.clean:
		#### do pca twice to remove outliers	
		pca=PCA(options.nbasis)
		pout=pca.fit_transform(imgsnp)
		dst=np.linalg.norm(pout-np.mean(pout, 0), axis=1)
		outlr=dst>np.mean(dst)+np.std(dst)*2
		
		np.savetxt("{}/pca_rmoutlier.txt".format(options.outpath), 
			np.hstack([ptclids, pout]))
		print("Removing {} outliers...".format(np.sum(outlr)))
		
		imgsnp=imgsnp[outlr==False]
		ptclids=ptclids[outlr==False]
	
	
	pca=PCA(options.nbasis)
	pout=pca.fit_transform(imgsnp)
	np.savetxt("{}/pca_ptcls.txt".format(options.outpath), 
		np.hstack([ptclids, pout]))

	basisfile = "{}/pca_basis.hdf".format(options.outpath)
	#threed.process("math.meanshrink",{"n":options.shrink}).write_image(basisfile, 0)
	l=len(data[0])
	for i,c in enumerate(pca.components_):
		eg=c[:l]+c[l:]*1j
		egmap=eg.reshape((sz,sz,sz))
		o=np.real(np.fft.ifftn(np.fft.ifftshift(egmap)))
		m=from_numpy(o.copy())
		m.write_image(basisfile,i)

	print("Classifying particles...")
	kmeans = KMeans(n_clusters=options.nclass).fit(pout)
	lb=kmeans.labels_
	
	if options.clean:
		lbfull=np.zeros(nptcl, dtype=int)
		lbfull[outlr==False]=lb
		lbfull[outlr]=-1
		lb=lbfull
		
	print("Class: Particle count")
	for i in range(kmeans.n_clusters):
		print("{}: {}".format(lb,np.sum(lb==i)))

	
	## subtomogram average particles from each class
	#avgr=[Averagers.get("mean.tomo") for i in range(kmeans.n_clusters)]
	#for i in range(nptcl):
		#avgr[lb[i]].add_image(imgs00[i])

	#classfile = "{}/classes3d.hdf".format(options.outpath)
	#for i in range(kmeans.n_clusters):
		#e=avgr[i].finish()
		#e.process_inplace("normalize")
		#e.write_image(classfile,i)

	inlst=LSXFile(inptcls, True)
	outlsts=[]
	for lbl in sorted(np.unique(lb)):
		#outlst = LSXFile(inptcls.replace(".lst","_{}.lst".format(lbl)))
		outlst = LSXFile("{}/ptcls_cls{:02d}.lst".format(options.outpath, lbl+1))
		for i in range(nptcl):
			if lb[i]==lbl:
				l=inlst.read(i)
				outlst.write(-1,l[0], l[1],l[2])
		outlst.close()
	inlst.close()
	
	js0=js_open_dict(parmsfile)

	#pname=eval(js.keys()[0])[0]
	dics=[{} for i in range(kmeans.n_clusters)]
	for i in range(nptcl):
		if lb[i]>=0:
			k=keys[i]
			dics[lb[i]][k]=js0[k]
			
	js0.close()

	for i,d in enumerate(dics):
		js=js_open_dict("{}/particle_parms_{:02d}.json".format(options.outpath, i+1))
		js.update(d)
		js.close()
		os.system("e2spt_average.py --path {} --iter {} --threads 10 --sym {} --skippostp --simthr 1".format(options.outpath, i+1, options.sym))

	E2end(logid)