def main(): usage="""prog --path <path to previous spt or subtlt refinement> [options] This program will run subtilt refinement based on previous subtomogram or subtilt refinement.""" parser = EMArgumentParser(usage=usage,version=EMANVERSION) #parser.add_header(name="orblock0", help='Just a visual separation', title="Inputs", row=0, col=1, rowspan=1, colspan=3, mode="model") parser.add_argument("--path", type=str,help="Path to the previous spt/subtlt refinement", default=None, guitype='filebox', browser="EMBrowserWidget(withmodal=True,multiselect=False)",row=1, col=0,rowspan=1, colspan=2) parser.add_argument("--iter", type=int,help="Start from iteration X of previous refinement", default=-1, guitype='intbox',row=1, col=2,rowspan=1, colspan=1) ##################### parser.add_header(name="orblock1", help='Just a visual separation', title="Options", row=2, col=1, rowspan=1, colspan=3, mode="model") parser.add_argument("--niters", type=int,help="Run this many iterations. Default is 4.", default=4, guitype='intbox',row=3, col=0,rowspan=1, colspan=1) parser.add_argument("--keep", type=float,help="propotion of tilts to keep. default is 0.5", default=0.5, guitype='floatbox',row=3, col=1,rowspan=1, colspan=1) parser.add_argument("--maxalt", type=float,help="max altitude to insert to volume", default=90.0, guitype='floatbox',row=3, col=2,rowspan=1, colspan=1) parser.add_argument("--mask", type=str, default="Auto" ,help="Refinement and reprojection masking.",guitype='strbox',row=4, col=0,rowspan=1, colspan=2) parser.add_argument("--nogs", action="store_true", default=False ,help="Skip gold standard. This is not a great idea...", guitype='boolbox',row=4, col=2,rowspan=1, colspan=1) parser.add_argument("--threads", type=int,help="Number of CPU threads to use. Default is 12.", default=12, guitype='intbox',row=5, col=2,rowspan=1, colspan=1) parser.add_argument("--parallel", type=str,help="Thread/mpi parallelism to use. Default is thread:12", default="thread:12", guitype='strbox',row=5, col=0,rowspan=1, colspan=2) parser.add_argument("--buildsetonly", action="store_true", default=False ,help="will only prepare particle set for the refinement but skip the actual refinement process.",guitype='boolbox',row=6, col=0,rowspan=1, colspan=1) parser.add_argument("--resume", action="store_true", default=False ,help="continue from previous run",guitype='boolbox',row=6, col=1,rowspan=1, colspan=1) parser.add_argument("--reproject", action="store_true", default=False ,help="Reproject 3D particles into 2D particles.",guitype='boolbox',row=7, col=0,rowspan=1, colspan=1) parser.add_argument("--reprj_offset", type=str, default="" ,help="Offset translation before reprojection") parser.add_argument("--reprj_clip", type=int, default=-1 ,help="clip after reprojection") parser.add_argument("--tophat", type=str, default="auto" ,help="Filter option for refine_postprocess. auto: same as spt refinement; local; global;") parser.add_argument("--refineastep", type=float,help="Mean angular variation for refine alignment", default=2.) parser.add_argument("--refinentry", type=int,help="number of starting points for refine alignment", default=16) parser.add_argument("--maxshift", type=int,help="maximum shift allowed", default=8) parser.add_argument("--padby", type=float,default=1.5, help="pad by factor. default is 1.5") parser.add_argument("--output", type=str,help="Write results to this directory. We do not recommend changing this.", default="subtlt")#, guitype='intbox',row=2, col=1,rowspan=1, colspan=1) parser.add_argument("--debug", action="store_true", default=False ,help="Turn on debug mode. This will only process a small subset of the data (threads * 8 particles)") parser.add_argument("--localnorm",action="store_true",help="local normalization. do not use yet....",default=False) parser.add_argument("--sym", type=str,help="symmetry. will use symmetry from spt refinement by default", default="c1") parser.add_argument("--ppid", type=int,help="ppid...", default=-1) (options, args) = parser.parse_args() logid=E2init(sys.argv) itr = options.iter oldpath = options.path if not oldpath: print("No input path. Exit.") return if options.iter != -1: itr = options.iter elif "spt" in oldpath: for f in sorted(os.listdir(oldpath)): if "particle_parms" in f: itrstr = f[15:].split(".")[0] if os.path.isfile(os.path.join(oldpath,"threed_{}.hdf".format(itrstr))): itr = int(itrstr) else: for f in sorted(os.listdir(oldpath)): if re.match("threed_[0-9][0-9].hdf",f): itr = int(f[7:].split(".")[0]) if not os.path.isfile(os.path.join(oldpath,"threed_{:02d}.hdf".format(itr))): print("Could not locate {}/threed_{:02d}.hdf".format(oldpath,itr)) print("Please specify the iteration number (--iter) of a completed subtomogram refinement.") return if "0_subtlt_params.json" in os.listdir(oldpath): print("Continuing from a subtilt refinement...") fromspt=False else: print("Start from a spt refinement...") fromspt=True if options.resume: if fromspt: print("Cannot resume from a spt refinement...") return path=oldpath e=EMData(os.path.join(path,"threed_{:02d}.hdf".format(itr))) else: path = make_path(options.output) print("Writing in {}...".format(path)) oldmap = os.path.join(oldpath,"threed_{:02d}.hdf".format(itr)) oem = os.path.join(oldpath,"threed_{:02d}_even.hdf".format(itr)) oom = os.path.join(oldpath,"threed_{:02d}_odd.hdf".format(itr)) oldfsc = os.path.join(oldpath, "fsc_masked_{:02d}.txt".format(itr)) copy2(oldmap,os.path.join(path,"threed_00.hdf")) copy2(oldfsc, os.path.join(path, "fsc_masked_00.txt")) copy2(oem,os.path.join(path,"threed_00_even.hdf")) copy2(oom,os.path.join(path,"threed_00_odd.hdf")) if fromspt: oldparm = os.path.join(oldpath,"particle_parms_{:02d}.json".format(itr)) copy2(oldparm,os.path.join(path,"particle_parms_00.json")) else: for eo in ["even", "odd"]: oali = os.path.join(oldpath,"ali_ptcls_{:02d}_{}.lst".format(itr, eo)) copy2(oali,os.path.join(path,"ali_ptcls_00_{}.lst".format(eo))) e=EMData(os.path.join(path,"threed_00.hdf")) bxsz=e["nx"] apix=e["apix_x"] jd = js_open_dict(os.path.join(path, "0_subtlt_params.json")) jd.update(vars(options)) jd["cmd"] = " ".join(sys.argv) jd["path"] = oldpath jd["iter"] = itr jd["output"] = path options.ptclkeep=1.0 if fromspt: sptparms = os.path.join(oldpath,"0_spt_params.json") else: sptparms = os.path.join(oldpath,"0_subtlt_params.json") if os.path.isfile(sptparms): oldjd = js_open_dict(sptparms) #print(oldjd.keys()) jd["mass"] = oldjd["mass"] jd["setsf"] = oldjd["setsf"] jd["sym"] = oldjd["sym"] jd["localfilter"]=oldjd["localfilter"] jd["mask"]=oldjd["mask"] if oldjd.has_key("radref"): jd["radref"]=oldjd["radref"] if fromspt: options.ptclkeep=oldjd["pkeep"] oldjd.close() else: print("Cannot find {}. exit.".format(sptparms)) return if options.mask.lower()!="none": #print("Overwritting masking") if options.mask.lower()=="auto": jd["mask"]="" else: jd["mask"]=options.mask #if options.localfilter==0: #jd["localfilter"]=False #elif options.localfilter==1: #jd["localfilter"]=True if len(options.sym)>0: jd["sym"]=options.sym jsparams=jd.data jd.close() jd = jsparams if fromspt and not options.reproject: js=js_open_dict(os.path.join(path,"particle_parms_00.json")) k=list(js.keys())[0] src=eval(k)[0] print("loading 3D particles from {}".format(base_name(src))) print("box size {}, apix {:.2f}".format(bxsz, apix)) lname=[os.path.join(path, "ali_ptcls_00_{}.lst".format(eo)) for eo in ["even", "odd"]] for l in lname: try: os.remove(l) except:pass lst=[LSXFile(m, False) for m in lname] n3d=len(list(js.keys())) if options.ptclkeep<1.0: score=[] for k in list(js.keys()): score.append(float(js[k]["score"])) simthr=np.sort(score)[int(len(score)*options.ptclkeep)] print("removing bad particles...") else: simthr=10000 for ky in js.keys(): src,ii=eval(ky) e=EMData(src, ii, True) fname=e["class_ptcl_src"] ids=e["class_ptcl_idxs"] #ky="('{}', {})".format(src, ii) dic=js[ky] xali=dic["xform.align3d"] scr=float(dic["score"]) if scr>simthr: continue if "__even" in src: eo=0 elif "__odd" in src: eo=1 else: eo=ii%2 #print(src, eo) for i in ids: try: m=EMData(fname, i, True) except: continue xf=m["xform.projection"] dc=xf.get_params("xyz") if abs(dc["ytilt"])>options.maxalt: continue rot=xf*xali.inverse() lst[eo].write(-1, i, fname, str(rot.get_params("eman"))) for l in lst: l.close() js=None if fromspt and options.reproject: print("Reprojecting 3D particles...") if options.reprj_offset!="": options.reprj_offset=[int(i) for i in options.reprj_offset.split(',')] print("Offset by {} before reprojection".format(options.reprj_offset)) else: options.reprj_offset=None if options.mask.lower()=="auto": mask=EMData(os.path.join(oldpath,"mask.hdf")) mask.process_inplace("filter.lowpass.gauss", {"cutoff_abs":.1}) mask.add(0.5) mask.div(1.5) mask.write_image(os.path.join(path,"mask_reproj.hdf")) elif options.mask.lower()=="none": mask=None else: mask=EMData(options.mask) ptclfile=[os.path.join(path,"particles_reproj_{}.hdf".format(eo)) for eo in ["even","odd"]] for p in ptclfile: try: os.remove(p) except: pass lstfile=[os.path.join(path, "ali_ptcls_00_{}.lst".format(eo)) for eo in ["even", "odd"]] for p in lstfile: try: os.remove(p) except: pass js=js_open_dict(os.path.join(path,"particle_parms_00.json")) jsdata=dict(js.data) js.close() if options.ptclkeep<1.0: score=[] for k in list(jsdata.keys()): score.append(float(jsdata[k]["score"])) simthr=np.sort(score)[int(len(score)*options.ptclkeep)] print("removing bad particles with score > {:.2f}...".format(simthr)) else: simthr=10000 keys=list(jsdata.keys()) nt=options.threads if options.debug: keys=keys[:nt*32] thrds=[threading.Thread(target=do_reproject, args=(jsdata, keys[i::nt], ptclfile, options, mask, simthr)) for i in range(nt)] print(len(thrds)," threads") thrtolaunch=0 while thrtolaunch<len(thrds) or threading.active_count()>1: # If we haven't launched all threads yet, then we wait for an empty slot, and launch another # note that it's ok that we wait here forever, since there can't be new results if an existing # thread hasn't finished. if thrtolaunch<len(thrds) : while (threading.active_count()==options.threads ) : time.sleep(.1) #if options.verbose : print("Starting thread {}/{}".format(thrtolaunch,len(thrds))) thrds[thrtolaunch].start() thrtolaunch+=1 else: time.sleep(1) for t in thrds: t.join() print("Now writing list files...") #### write a list file for k in [0,1]: n=EMUtil.get_image_count(ptclfile[k]) lst=LSXFile(lstfile[k], False) for i in range(n): e=EMData(ptclfile[k], i, True) rot=e["xform.align3d"] lst.write(-1, i, ptclfile[k], str(rot.get_params("eman"))) lst=None if options.buildsetonly: return if options.resume: starti=itr else: starti=0 for itr in range(starti,options.niters+starti): for eo in ["even", "odd"]: if options.nogs: threedname=os.path.join(path, "threed_{:02d}.hdf".format(itr)) else: threedname=os.path.join(path, "threed_{:02d}_{}.hdf".format(itr, eo)) lstname=os.path.join(path, "ali_ptcls_{:02d}_{}.lst".format(itr, eo)) lname=os.path.join(path, "ali_ptcls_{:02d}_{}.lst".format(itr+1, eo)) threedout=os.path.join(path, "threed_{:02d}_{}.hdf".format(itr+1, eo)) cmd="e2spt_tiltrefine_oneiter.py --ptclin {} --ptclout {} --ref {} --threedout {} --keep {} --threads {} --parallel {} --refineastep {} --refinentry {} --maxshift {} --padby {} --sym {}".format(lstname, lname, threedname, threedout, options.keep, options.threads, options.parallel, options.refineastep, options.refinentry, options.maxshift, options.padby, jd["sym"]) if options.debug: cmd+=" --debug" run(cmd) run("e2proc3d.py {} {}".format(threedout, os.path.join(path, "threed_raw_{}.hdf".format(eo)))) s = "" if jd.has_key("goldstandard"): if jd["goldstandard"] > 0: s += " --align" if jd.has_key("setsf"): s += " --setsf {}".format(jd['setsf']) #options.setsf) if options.tophat=="auto" and jd.has_key("localfilter") and jd["localfilter"]==True: s += " --tophat local" elif options.tophat=="local": s += " --tophat local" elif options.tophat=="global": s += " --tophat global" msk = jd["mask"] #{}/mask_tight.hdf".format(path) if len(msk)>0: if os.path.isfile(msk): msk=" --automask3d mask.fromfile:filename={}".format(msk) else: msk=" --automask3d {}".format(msk) s+=msk # get target resolution from last iteration map ref=os.path.join(path, "threed_{:02d}.hdf".format(itr)) fsc=np.loadtxt(os.path.join(path, "fsc_masked_{:02d}.txt".format(itr))) try: rs=1./fsc[fsc[:,1]<0.3, 0][0] except: rs=10 curres=rs*.5 even=os.path.join(path, "threed_{:02d}_even.hdf".format(itr+1)) odd=os.path.join(path, "threed_{:02d}_odd.hdf".format(itr+1)) #os.system("rm {}/mask*.hdf {}/*unmasked.hdf".format(path, path)) ppcmd="e2refine_postprocess.py --even {} --odd {} --output {} --iter {:d} --restarget {} --threads {} --sym {} --mass {} {}".format(even, odd, os.path.join(path, "threed_{:02d}.hdf".format(itr+1)), itr+1, curres, options.threads, jd["sym"], jd["mass"], s) run(ppcmd) if options.localnorm: for f in [even, odd]: run("e2proc3d.py {} {} --process normalize --process normalize.local:threshold=1:radius=16".format(f,f)) run("e2proc3d.py {} {} --addfile {} --process normalize".format( even, os.path.join(path, "threed_{:02d}.hdf".format(itr+1)), odd)) fsc=np.loadtxt(os.path.join(path, "fsc_masked_{:02d}.txt".format(itr+1))) print("Resolution (FSC<0.3) is ~{:.1f} A".format(rs)) E2end(logid)
def main(): usage=" " parser = EMArgumentParser(usage=usage,version=EMANVERSION) parser.add_argument("--path", type=str,help="path", default=None, guitype='strbox',row=0, col=0,rowspan=1, colspan=1) parser.add_argument("--iter", type=int,help="start from iteration X", default=-1, guitype='intbox',row=0, col=1,rowspan=1, colspan=1) parser.add_argument("--niters", type=int,help="run this many iterations. Default is 4.", default=4, guitype='intbox',row=0, col=2,rowspan=1, colspan=1) parser.add_argument("--sym", type=str,help="symmetry. will use symmetry from spt refinement by default", default="", guitype='strbox',row=2, col=0,rowspan=1, colspan=1) parser.add_argument("--padby", type=float,help="pad by factor. default is 2", default=2., guitype='floatbox',row=1, col=1,rowspan=1, colspan=1) parser.add_argument("--keep", type=float,help="propotion of tilts to keep. default is 0.5", default=0.5, guitype='floatbox',row=1, col=2,rowspan=1, colspan=1) parser.add_argument("--maxalt", type=float,help="max altitude to insert to volume", default=90.0, guitype='floatbox',row=1, col=0,rowspan=1, colspan=1) parser.add_argument("--nogs", action="store_true", default=False ,help="skip gold standard...", guitype='boolbox',row=2, col=1,rowspan=1, colspan=1) parser.add_argument("--localfilter", type=int, default=-1 ,help="use tophat local. specify 0 or 1 to overwrite the setting in the spt refinement") parser.add_argument("--mask", type=str, default="None" ,help="Refinement masking. default is the same as the spt refinement. Leave this empty for automasking",guitype='strbox',row=3, col=0,rowspan=1, colspan=2) parser.add_argument("--threads", type=int,help="Number of CPU threads to use. Default is 12.", default=12, guitype='intbox',row=2, col=2,rowspan=1, colspan=1) parser.add_argument("--parallel", type=str,help="Thread/mpi parallelism to use. Default is thread:12", default="thread:12", guitype='strbox',row=4, col=0,rowspan=1, colspan=3) parser.add_argument("--refineastep", type=float,help="angular variation for refine alignment (gauss std)", default=8.) parser.add_argument("--refinentry", type=int,help="number of starting points for refine alignment", default=32) parser.add_argument("--maxshift", type=int,help="maximum shift allowed", default=10) parser.add_argument("--buildsetonly", action="store_true", default=False ,help="build sets only") parser.add_argument("--output", type=str,help="Write results to this directory. We do not recommend changing this.", default="subtlt")#, guitype='intbox',row=2, col=1,rowspan=1, colspan=1) parser.add_argument("--debug", action="store_true", default=False ,help="Turn on debug mode. This will only process a small subset of the data (threads * 8 particles)") parser.add_argument("--ppid", type=int,help="ppid...", default=-1) (options, args) = parser.parse_args() logid=E2init(sys.argv) itr = options.iter oldpath = options.path if not oldpath: print("No input path. Exit.") return if options.iter != -1: itr = options.iter elif "spt" in oldpath: for f in sorted(os.listdir(oldpath)): if "particle_parms" in f: itrstr = f[15:].split(".")[0] if os.path.isfile("{}/threed_{}.hdf".format(oldpath,itrstr)): itr = int(itrstr) else: for f in sorted(os.listdir(oldpath)): if re.match("threed_[0-9][0-9].hdf",f): itr = int(f[7:].split(".")[0]) # print(oldpath) fromspt=True if "0_subtlt_params.json" in os.listdir(oldpath): print("Continuing from a subtilt refinement...") fromspt=False path = make_path(options.output) if not os.path.isfile("{}/threed_{:02d}.hdf".format(oldpath,itr)): print("Could not locate {}/threed_{:02d}.hdf".format(oldpath,itr)) print("Please specify the iteration number (--iter) of a completed subtomogram refinement.") sys.exit(1) #elif not os.path.isfile("{}/particle_parms_{:02d}.json".format(oldpath,itr)): #print("Could not locate {}/particle_parms_{:02d}.json".format(oldpath,itr)) #print("Please specify the iteration number (--iter) of a completed subtomogram refinement.") #sys.exit(1) else: #copy2("{}/0_spt_params.json".format(oldpath),"{}/0_subtlt_params.json".format(path)) oldmap = os.path.join(oldpath,"threed_{:02d}.hdf".format(itr)) oem = os.path.join(oldpath,"threed_{:02d}_even.hdf".format(itr)) oom = os.path.join(oldpath,"threed_{:02d}_odd.hdf".format(itr)) oldfsc = os.path.join(oldpath, "fsc_masked_{:02d}.txt".format(itr)) copy2(oldmap,os.path.join(path,"threed_00.hdf")) copy2(oldfsc, os.path.join(path, "fsc_masked_00.txt")) copy2(oem,os.path.join(path,"threed_00_even.hdf")) copy2(oom,os.path.join(path,"threed_00_odd.hdf")) if fromspt: oldparm = os.path.join(oldpath,"particle_parms_{:02d}.json".format(itr)) copy2(oldparm,os.path.join(path,"particle_parms_00.json")) else: for eo in ["even", "odd"]: oali = os.path.join(oldpath,"ali_ptcls_{:02d}_{}.lst".format(itr, eo)) copy2(oali,os.path.join(path,"ali_ptcls_00_{}.lst".format(eo))) e=EMData(os.path.join(path,"threed_00.hdf")) bxsz=e["nx"] apix=e["apix_x"] jd = js_open_dict("{}/0_subtlt_params.json".format(path)) jd.update(vars(options)) jd["cmd"] = " ".join(sys.argv) jd["path"] = oldpath jd["iter"] = itr jd["output"] = path if fromspt: sptparms = os.path.join(oldpath,"0_spt_params.json") else: sptparms = os.path.join(oldpath,"0_subtlt_params.json") if os.path.isfile(sptparms): oldjd = js_open_dict(sptparms) #print(oldjd.keys()) jd["mass"] = oldjd["mass"] jd["setsf"] = oldjd["setsf"] jd["sym"] = oldjd["sym"] jd["localfilter"]=oldjd["localfilter"] jd["mask"]=oldjd["mask"] oldjd.close() else: print("Cannot find {}. exit.".format(sptparms)) if options.mask.lower()!="none": print("Overwritting masking") jd["mask"]=options.mask if options.localfilter==0: jd["localfilter"]=False elif options.localfilter==1: jd["localfilter"]=True if len(options.sym)>0: jd["sym"]=options.sym jsparams=jd.data jd.close() jd = jsparams if fromspt: js=js_open_dict(os.path.join(path,"particle_parms_00.json")) k=list(js.keys())[0] src=eval(k)[0] print("loading 3D particles from {}".format(src)) print("box size {}, apix {:.2f}".format(bxsz, apix)) lname=[os.path.join(path, "ali_ptcls_00_{}.lst".format(eo)) for eo in ["even", "odd"]] for l in lname: try: os.remove(l) except:pass lst=[LSXFile(m, False) for m in lname] n3d=len(list(js.keys())) for ii in range(n3d): e=EMData(src, ii, True) fname=e["class_ptcl_src"] ids=e["class_ptcl_idxs"] ky="('{}', {})".format(src, ii) dic=js[ky] xali=dic["xform.align3d"] for i in ids: try: m=EMData(fname, i, True) except: continue xf=m["xform.projection"] dc=xf.get_params("xyz") if abs(dc["ytilt"])>options.maxalt: continue rot=xf*xali.inverse() lst[ii%2].write(-1, i, fname, str(rot.get_params("eman"))) for l in lst: l.close() js=None if options.buildsetonly: return for itr in range(0,options.niters): from EMAN2PAR import EMTaskCustomer for eo in ["even", "odd"]: if options.nogs: threedname="{}/threed_{:02d}.hdf".format(path, itr) else: threedname="{}/threed_{:02d}_{}.hdf".format(path, itr, eo) lstname="{}/ali_ptcls_{:02d}_{}.lst".format(path, itr, eo) lst=LSXFile(lstname, True) m=EMData(threedname) m.process_inplace('normalize.edgemean') pinfo=[] if options.debug: nptcl=options.threads*8 else: nptcl=lst.n for i in range(nptcl): pinfo.append(lst.read(i)) lst=None etc=EMTaskCustomer(options.parallel) num_cpus = etc.cpu_est() print("{} total CPUs available".format(num_cpus)) print("{} jobs".format(nptcl)) infos=[[] for i in range(num_cpus)] for i,info in enumerate(pinfo): infos[i%num_cpus].append([i, info]) tids=[] for info in infos: task = SptTltRefineTask(info, m, options) tid=etc.send_task(task) tids.append(tid) while 1: st_vals = etc.check_task(tids) #print("{:.1f}/{} finished".format(np.mean(st_vals), 100)) #print(tids) if np.min(st_vals) == 100: break time.sleep(5) dics=[0]*nptcl for i in tids: ret=etc.get_results(i)[1] for r in ret: #print(r) ii=r.pop("idx") dics[ii]=r del etc allscr=np.array([d["score"] for d in dics]) print(np.min(allscr), np.mean(allscr), np.max(allscr), np.std(allscr)) allscr*=-1 s=allscr.copy() s-=np.mean(s) s/=np.std(s) clp=2 ol=abs(s)>clp print("Removing {} outliers from {} particles..".format(np.sum(ol), len(s))) s=old_div(old_div((s+clp),clp),2) s[ol]=0 allscr=s #allscr-=np.min(allscr)-1e-5 #allscr/=np.max(allscr) lname="{}/ali_ptcls_{:02d}_{}.lst".format(path, itr+1, eo) try: os.remove(lname) except: pass lout=LSXFile(lname, False) for i, dc in enumerate(dics): d=dc["xform.align3d"].get_params("eman") d["score"]=float(allscr[i]) l=pinfo[i] lout.write(-1, l[0], l[1], str(d)) lout=None pb=options.padby threedout="{}/threed_{:02d}_{}.hdf".format(path, itr+1, eo) cmd="e2make3dpar.py --input {inp} --output {out} --pad {pd} --padvol {pdv} --threads {trd} --outsize {bx} --apix {apx} --mode gauss_var --keep {kp} --sym {sm}".format( inp=lname, out=threedout, bx=bxsz, pd=int(bxsz*pb), pdv=int(bxsz*pb), apx=apix, kp=options.keep, sm=jd["sym"], trd=options.threads) run(cmd) run("e2proc3d.py {} {}".format(threedout, "{}/threed_raw_{}.hdf".format(path, eo))) s = "" if jd.has_key("goldstandard"): if jd["goldstandard"] > 0: s += " --align" if jd.has_key("setsf"): s += " --setsf {}".format(jd['setsf']) #options.setsf) if jd.has_key("localfilter"): s += " --tophat local" msk = jd["mask"] #{}/mask_tight.hdf".format(path) if len(msk)>0: if os.path.isfile(msk): msk=" --automask3d mask.fromfile:filename={}".format(msk) else: msk=" --automask3d {}".format(msk) # get target resolution from last iteration map ref=os.path.join(path, "threed_{:02d}.hdf".format(itr)) fsc=np.loadtxt(os.path.join(path, "fsc_masked_{:02d}.txt".format(itr))) rs=1./fsc[fsc[:,1]<0.3, 0][0] curres=rs*.5 #os.system("rm {}/mask*.hdf {}/*unmasked.hdf".format(path, path)) ppcmd="e2refine_postprocess.py --even {} --odd {} --output {} --iter {:d} --restarget {} --threads {} --sym {} --mass {} {}".format( os.path.join(path, "threed_{:02d}_even.hdf".format(itr+1)), os.path.join(path, "threed_{:02d}_odd.hdf".format(itr+1)), os.path.join(path, "threed_{:02d}.hdf".format(itr+1)), itr+1, curres, options.threads, jd["sym"], jd["mass"], s) run(ppcmd) fsc=np.loadtxt(os.path.join(path, "fsc_masked_{:02d}.txt".format(itr+1))) rs=1./fsc[fsc[:,1]<0.3, 0][0] print("Resolution (FSC<0.3) is ~{:.1f} A".format(rs)) E2end(logid)
def main(): progname = os.path.basename(sys.argv[0]) usage = """Usage: e2spt_pcasplit.py --path <spt_XX> [options]""" parser = EMArgumentParser(usage=usage,version=EMANVERSION) parser.add_argument("--path",type=str,required=True,default=None,help="Path to a folder where results should be stored, following standard naming conventions (default = spt_XX)",guitype='filebox',row=0, col=0, rowspan=1, colspan=2) parser.add_argument("--iter",type=int,required=True,help="Iteration number within path. Default is the second to last iteration (-2).",default=-2,guitype='intbox',row=1, col=0, rowspan=1, colspan=1) parser.add_argument("--nclass",type=int,required=True,help="Number of classes. Default is 2.",default=2,guitype="intbox",row=1, col=1, rowspan=1, colspan=1) parser.add_header(name="orblock1", help='', title="Optional", row=2, col=0, rowspan=1, colspan=2) parser.add_argument("--maxres",type=float,help="Filter particles to this resolution (in Angstroms) before classification",default=30.0,guitype="floatbox",row=3, col=0, rowspan=1, colspan=1) parser.add_argument("--sym",type=str,help="Apply this symmetry.",default="c1",guitype="strbox",row=3, col=1, rowspan=1, colspan=1) parser.add_argument("--mask",type=str,help="Apply this mask. Default is 'mask_tight.hdf' from <--path>_<--iter>. Specify 'none' for no masking",default="",guitype="filebox",row=4, col=0, rowspan=1, colspan=2) parser.add_argument("--nbasis",type=int,required=True,help="Number of PCA basis vectors. Default is 3.",default=3,guitype="intbox",row=5, col=0, rowspan=1, colspan=1) # parser.add_argument("--keepthresh",type=float,help="Center PCA outliers beyond this value before performing K-means clustering. Default is no threshold (-1).",default=,guitype="floatbox",row=5, col=0, rowspan=1, colspan=1) parser.add_argument("--nowedgefill",action='store_true',help="Do not fill the missing wedge before classification.",default=False,guitype="boolbox",row=5, col=1, rowspan=1, colspan=1) parser.add_argument("--clean",action='store_true',help="remove outliers before PCA.",default=False,guitype="boolbox",row=6, col=1, rowspan=1, colspan=1) parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness") parser.add_argument("--shrink",type=int,help="Shrink particles before classification",default=1) parser.add_argument("--dotest",type=int,help="test using N random particles",default=-1) parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1) (options, args) = parser.parse_args() if options.path == None: print("You must specify the path to an existing spt_XX refinement directory.") sys.exit(1) if not os.path.isdir(options.path): print("Could not locate --path {}".format(options.path)) sys.exit(1) if options.iter==-2: fls=[int(i[15:17]) for i in os.listdir(options.path) if i[:15]=="particle_parms_" and str.isdigit(i[15:17])] if len(fls)==0 : print("No subtomogram alignment was complered for this refinement. Please try another") sys.exit(1) options.iter=max(fls)-1 logid=E2init(sys.argv, options.ppid) threed = EMData("{}/threed_{:02d}.hdf".format(options.path,options.iter)) if options.mask=="none": msk=threed.copy() msk.to_one() elif options.mask=="": msk = EMData("{}/mask_tight.hdf".format(options.path)) else: msk = EMData(options.mask) #refparms=js_open_dict("{}/0_spt_params.json".format(options.path)) #try: #inptcls = refparms["input_ptcls"] #except: #print("Could not locate input particles. Path and iteration must reference a standard 3D refinement (e2spt_refine.py) and not a subtilt refinement (e2spt_subtilt.py).") #sys.exit(1) parmsfile = "{}/particle_parms_{:02d}.json".format(options.path,options.iter) js=js_open_dict(parmsfile) nptcl=len(js.keys()) irange=np.arange(nptcl, dtype=int) if options.dotest>0: np.random.shuffle(irange) nptcl=min(nptcl, options.dotest) irange=irange[:nptcl] print("Test with {} particles".format(nptcl)) inptcls=pname=eval(js.keys()[0])[0] print("Preprocessing {} particles...".format(nptcl)) #### doing everything in fourier space with numpy data=[] wgs=[] keys=[] ### to clip fourier space based on resolution sz=threed["nx"]//options.shrink apix=threed["apix_x"]*options.shrink freq=np.fft.fftfreq(sz, apix)[:sz//2] clip=sz//2-np.argmax(freq>1./options.maxres) if clip==sz//2: clip=0 #n=EMUtil.get_image_count(pname) for i in irange.tolist(): k="('{}', {})".format(pname, i) if js.has_key(k)==False: continue xf=js[k]['xform.align3d'] e=EMData(pname, i) e.transform(xf) e.mult(msk) if options.shrink>1: e.process_inplace("math.meanshrink",{"n":options.shrink}) en=e.numpy().copy() #print(en.shape, clip) #### numpy fft is actually significantly slower than EMAN fft. so probably should change to EMAN if I can get the coordinates right.. ef=np.fft.fftshift(np.fft.fftn(en)) if clip>0: ef=ef[clip:-clip,clip:-clip,clip:-clip] if len(data)==0: sz=len(ef) idx=np.indices((sz,sz,sz))-sz//2 r=np.sqrt(np.sum(idx**2, 0)) r = r.astype(np.int) nr = np.bincount(r.ravel()) efa=abs(ef) tbin = np.bincount(r.ravel(), efa.ravel()) sf = tbin / nr sf[sf==0]=1e-5 div=np.divide(efa,sf[r]) wdg=np.logical_and(div<1., r>1) ef[wdg]=0 data.append(ef.flatten()) wgs.append(wdg.flatten()) keys.append(k) sys.stdout.write("\r {}/{} particles".format(len(data),nptcl)) sys.stdout.flush() print() js.close() data=np.array(data) wgs=np.array(wgs) print(data.shape) #avg=np.mean(data, axis=0) avg=np.sum(data, axis=0) w=np.sum(1-np.array(wgs), axis=0)+1 avg=avg/w dv=data-avg std=np.std(abs(dv)) dv/=std #for i,w in enumerate(wgs): #data[i][w]=avg[w] #dv=data imgsnp=np.hstack([dv.real, dv.imag]) print(dv.real.shape, imgsnp.shape) options.outpath = make_path("sptcls") print("Output will be written to {}".format(options.outpath)) #### real space version: #refft = threed.do_fft() #imgs00 = [] #for i in range(nptcl): #sys.stdout.write("\r{}/{} particles".format(i+1,nptcl)) #sys.stdout.flush() #k="('{}', {})".format(pname, i) #xf=js[k]['xform.align3d'] #e=EMData(pname, i) #e.transform(xf) #e["score"]=js[k]['score'] #if options.nowedgefill: #enew = e #else: #eft=e.do_fft() #eft.process_inplace("mask.wedgefill",{"fillsource":refft, "thresh_sigma":0.0}) #enew=eft.do_ift() #enew.mult(msk) #enew.process_inplace("math.meanshrink",{"n":options.shrink}) #enew.process_inplace("filter.lowpass.gauss",{"cutoff_freq":1/options.maxres}) #if options.sym != None: #enew.process_inplace("xform.applysym",{"averager":"mean.tomo", "sym":options.sym}) #enew["xform.align3d"]=xf #enew.write_image("{}/aliptcls.hdf".format(options.outpath), i) #imgs00.append(enew) #js.close() #nptcl = len(imgs00) #imgsnp=np.array([m.numpy().copy() for m in imgs00]) print("Performing PCA...") #print(imgsnp) ptclids=np.arange(nptcl,dtype=int)[:, None] nptcl=len(imgsnp) if options.clean: #### do pca twice to remove outliers pca=PCA(options.nbasis) pout=pca.fit_transform(imgsnp) dst=np.linalg.norm(pout-np.mean(pout, 0), axis=1) outlr=dst>np.mean(dst)+np.std(dst)*2 np.savetxt("{}/pca_rmoutlier.txt".format(options.outpath), np.hstack([ptclids, pout])) print("Removing {} outliers...".format(np.sum(outlr))) imgsnp=imgsnp[outlr==False] ptclids=ptclids[outlr==False] pca=PCA(options.nbasis) pout=pca.fit_transform(imgsnp) np.savetxt("{}/pca_ptcls.txt".format(options.outpath), np.hstack([ptclids, pout])) basisfile = "{}/pca_basis.hdf".format(options.outpath) #threed.process("math.meanshrink",{"n":options.shrink}).write_image(basisfile, 0) l=len(data[0]) for i,c in enumerate(pca.components_): eg=c[:l]+c[l:]*1j egmap=eg.reshape((sz,sz,sz)) o=np.real(np.fft.ifftn(np.fft.ifftshift(egmap))) m=from_numpy(o.copy()) m.write_image(basisfile,i) print("Classifying particles...") kmeans = KMeans(n_clusters=options.nclass).fit(pout) lb=kmeans.labels_ if options.clean: lbfull=np.zeros(nptcl, dtype=int) lbfull[outlr==False]=lb lbfull[outlr]=-1 lb=lbfull print("Class: Particle count") for i in range(kmeans.n_clusters): print("{}: {}".format(lb,np.sum(lb==i))) ## subtomogram average particles from each class #avgr=[Averagers.get("mean.tomo") for i in range(kmeans.n_clusters)] #for i in range(nptcl): #avgr[lb[i]].add_image(imgs00[i]) #classfile = "{}/classes3d.hdf".format(options.outpath) #for i in range(kmeans.n_clusters): #e=avgr[i].finish() #e.process_inplace("normalize") #e.write_image(classfile,i) inlst=LSXFile(inptcls, True) outlsts=[] for lbl in sorted(np.unique(lb)): #outlst = LSXFile(inptcls.replace(".lst","_{}.lst".format(lbl))) outlst = LSXFile("{}/ptcls_cls{:02d}.lst".format(options.outpath, lbl+1)) for i in range(nptcl): if lb[i]==lbl: l=inlst.read(i) outlst.write(-1,l[0], l[1],l[2]) outlst.close() inlst.close() js0=js_open_dict(parmsfile) #pname=eval(js.keys()[0])[0] dics=[{} for i in range(kmeans.n_clusters)] for i in range(nptcl): if lb[i]>=0: k=keys[i] dics[lb[i]][k]=js0[k] js0.close() for i,d in enumerate(dics): js=js_open_dict("{}/particle_parms_{:02d}.json".format(options.outpath, i+1)) js.update(d) js.close() os.system("e2spt_average.py --path {} --iter {} --threads 10 --sym {} --skippostp --simthr 1".format(options.outpath, i+1, options.sym)) E2end(logid)