def mem_calc_and_output_info(smearing_file, log_main, nxinit): global Blockdata, Tracker from utilities import read_text_file, MPI_start_end import numpy as np # single CPU ; no info update smearing_list = np.array(read_text_file(smearing_file), dtype=np.int32) indx_list = read_text_file(iter_id_init_file, -1) if len(indx_list) == 1: indx_list = indx_list[0] else: indx_list = indx_list[1] indx_list = np.sort(np.array(indx_list, dtype=np.int32)) avg_smear = np.sum(smearing_list[indx_list]) / indx_list.shape[0] cdata_in_core = (Tracker["total_stack"] * Tracker["nxinit"] * Tracker["nxinit"] * 4.0) / 1.e9 / Blockdata["no_of_groups"] srdata_in_core = (nxinit * nxinit * np.sum(smearing_list[indx_list]) * 4.) / 1.e9 / Blockdata["no_of_groups"] if not Tracker["constants"]["focus3D"]: fdata_in_core = 0.0 else: fdata_in_core = cdata_in_core ctfdata = cdata_in_core refvol_size = (nxinit * nxinit * nxinit * 4.0 * 2) / 1.e9 # including the 3D mask log_main.add( "Precalculated data (GB) in core per node (available memory per node: %6.2f):" % Tracker["constants"]["memory_per_node"]) log_main.add( "Images for comparison: %6.2f GB; shifted images: %6.2f GB; focus images: %6.2f GB; ctfs: %6.2f GB"%\ (cdata_in_core, srdata_in_core, fdata_in_core, ctfdata)) tdata = cdata_in_core + srdata_in_core + ctfdata + refvol_size + fdata_in_core log_main.add("The data to be in core for sorting occupies %7.3f percents of memory; average smearing is %5.1f"%\ (tdata/Tracker["constants"]["memory_per_node"]*100., avg_smear)) log_main.add("Consumed memory in sorting on individual nodes:") smearings_on_nodes = np.full(Blockdata["no_of_groups"], 0.0, dtype=np.float32) for iproc in range(Blockdata["nproc"]): image_start, image_end = MPI_start_end(smearing_list.shape[0], Blockdata["nproc"], iproc) smearings_on_nodes[iproc//Blockdata["no_of_processes_per_group"]] += \ np.sum(smearing_list[image_start:image_end])*(nxinit*nxinit*4.)/1.e9 msg = "" for icolor in range(Blockdata["no_of_groups"]): tdata = cdata_in_core + ctfdata + refvol_size + smearings_on_nodes[ icolor] + fdata_in_core msg += "Node %5d : Mem %7.2f GB " % (icolor, tdata) + "; " if (icolor % 3 == 2): log_main.add(msg) msg = "" if Blockdata["no_of_groups"] % 3 != 0: log_main.add(msg) return
def helical2( ref_data ): from utilities import print_msg from filter import fit_tanh, filt_tanl from morphology import threshold # Prepare the reference in helical refinement, i.e., low-pass filter. # Input: list ref_data # 2 - raw volume # Output: filtered, and masked reference image global ref_ali2d_counter ref_ali2d_counter += 1 print_msg("helical2 #%6d\n"%(ref_ali2d_counter)) stat = Util.infomask(ref_data[2], None, True) volf = ref_data[2] - stat[0] volf = threshold(volf) fl = 0.25#0.17 aa = 0.1 msg = "Tangent filter: cut-off frequency = %10.3f fall-off = %10.3f\n"%(fl, aa) print_msg(msg) volf = filt_tanl(volf, fl, aa) from utilities import read_text_file dipr=read_text_file('symdoc.txt',-1) #here pixel size, fract, rmax and rmin will have to be read from external text file from alignment import helios volf, dp, dphi = helios(volf, 2.175, dipr[0][-1], dipr[1][-1], 0.7, 30,3) print_msg("New delta z and delta phi : %s, %s\n\n"%(dp,dphi)) fofo = open('symdoc.txt','a') fofo.write(' %12.4f %12.4f\n'%(dp,dphi)) fofo.close() return volf,[0.0,0.0,0.0]
def main(): # open a text file dirty_text = read_text_file('alice.txt') # clean the text and create list of words word_list = scrub_text(dirty_text) # loop through the list of words and count the number of occurances of each word_counts = tally_words(word_list) # number of words to show n = 15 # find the top n most frequent words and their counts top_n = find_most_frequent(word_counts, n) # write the top n words to a text file print(write_text_file('top_n_words.txt', top_n, n))
def main(): from optparse import OptionParser from global_def import SPARXVERSION from EMAN2 import EMData from logger import Logger, BaseLogger_Files import sys, os, time global Tracker, Blockdata from global_def import ERROR progname = os.path.basename(sys.argv[0]) usage = progname + " --output_dir=output_dir --isac_dir=output_dir_of_isac " parser = OptionParser(usage, version=SPARXVERSION) parser.add_option( "--adjust_to_analytic_model", action="store_true", default=False, help="adjust power spectrum of 2-D averages to an analytic model ") parser.add_option( "--adjust_to_given_pw2", action="store_true", default=False, help="adjust power spectrum to 2-D averages to given 1D power spectrum" ) parser.add_option("--B_enhance", action="store_true", default=False, help="using B-factor to enhance 2-D averages") parser.add_option("--no_adjustment", action="store_true", default=False, help="No power spectrum adjustment") options_list = [] adjust_to_analytic_model = False for q in sys.argv[1:]: if (q[:26] == "--adjust_to_analytic_model"): adjust_to_analytic_model = True options_list.append(q) break adjust_to_given_pw2 = False for q in sys.argv[1:]: if (q[:21] == "--adjust_to_given_pw2"): adjust_to_given_pw2 = True options_list.append(q) break B_enhance = False for q in sys.argv[1:]: if (q[:11] == "--B_enhance"): B_enhance = True options_list.append(q) break no_adjustment = False for q in sys.argv[1:]: if (q[:15] == "--no_adjustment"): no_adjustment = True options_list.append(q) break if len(options_list) == 0: if (Blockdata["myid"] == Blockdata["main_node"]): print( "specify one of the following options to start: 1. adjust_to_analytic_model; 2. adjust_to_given_pw2; 3. B_enhance; 4. no_adjustment" ) if len(options_list) > 1: ERROR( "The specified options are exclusive. Use only one of them to start", "sxcompute_isac_avg.py", 1, Blockdata["myid"]) # options in common parser.add_option( "--isac_dir", type="string", default='', help="ISAC run output directory, input directory for this command") parser.add_option( "--output_dir", type="string", default='', help="output directory where computed averages are saved") parser.add_option("--pixel_size", type="float", default=-1.0, help="pixel_size of raw images") parser.add_option( "--fl", type="float", default=-1.0, help= "low pass filter, =-1, not applied; =1, using FH1 (initial resolution), =2 using FH2 (resolution after local alignment), or user provided value" ) parser.add_option("--stack", type="string", default="", help="data stack used in ISAC") parser.add_option("--radius", type="int", default=-1, help="radius") parser.add_option("--xr", type="float", default=-1.0, help="local alignment search range") parser.add_option("--ts", type="float", default=1.0, help="local alignment search step") parser.add_option("--fh", type="float", default=-1., help="local alignment high frequencies limit") parser.add_option("--maxit", type="int", default=5, help="local alignment iterations") parser.add_option("--navg", type="int", default=-1, help="number of aveages") parser.add_option("--skip_local_alignment", action="store_true", default=False, help="skip local alignment") parser.add_option( "--noctf", action="store_true", default=False, help= "no ctf correction, useful for negative stained data. always ctf for cryo data" ) if B_enhance: parser.add_option( "--B_start", type="float", default=10.0, help= "start frequency (1./Angstrom) of power spectrum for B_factor estimation" ) parser.add_option( "--Bfactor", type="float", default=-1.0, help= "User defined bactors (e.g. 45.0[A^2]). By default, the program automatically estimates B-factor. " ) if adjust_to_given_pw2: parser.add_option("--modelpw", type="string", default='', help="1-D reference power spectrum") checking_flag = 0 if (Blockdata["myid"] == Blockdata["main_node"]): if not os.path.exists(options.modelpw): checking_flag = 1 checking_flag = bcast_number_to_all(checking_flag, Blockdata["main_node"], MPI_COMM_WORLD) if checking_flag == 1: ERROR("User provided power spectrum does not exist", "sxcompute_isac_avg.py", 1, Blockdata["myid"]) (options, args) = parser.parse_args(sys.argv[1:]) Tracker = {} Constants = {} Constants["isac_dir"] = options.isac_dir Constants["masterdir"] = options.output_dir Constants["pixel_size"] = options.pixel_size Constants["orgstack"] = options.stack Constants["radius"] = options.radius Constants["xrange"] = options.xr Constants["xstep"] = options.ts Constants["FH"] = options.fh Constants["maxit"] = options.maxit Constants["navg"] = options.navg Constants["low_pass_filter"] = options.fl if B_enhance: Constants["B_start"] = options.B_start Constants["Bfactor"] = options.Bfactor if adjust_to_given_pw2: Constants["modelpw"] = options.modelpw Tracker["constants"] = Constants # ------------------------------------------------------------- # # Create and initialize Tracker dictionary with input options # State Variables #<<<---------------------->>>imported functions<<<--------------------------------------------- from utilities import get_im, bcast_number_to_all, write_text_file, read_text_file, wrap_mpi_bcast, write_text_row from utilities import cmdexecute from filter import filt_tanl from time import sleep from logger import Logger, BaseLogger_Files import user_functions import string from string import split, atoi, atof import json #x_range = max(Tracker["constants"]["xrange"], int(1./Tracker["ini_shrink"])+1) #y_range = x_range ####----------------------------------------------------------- # Create Master directory line = strftime("%Y-%m-%d_%H:%M:%S", localtime()) + " =>" if Tracker["constants"]["masterdir"] == Tracker["constants"]["isac_dir"]: masterdir = os.path.join(Tracker["constants"]["isac_dir"], "sharpen") else: masterdir = Tracker["constants"]["masterdir"] if (Blockdata["myid"] == Blockdata["main_node"]): msg = "Postprocessing ISAC 2D averages starts" print(line, "Postprocessing ISAC 2D averages starts") if not masterdir: timestring = strftime("_%d_%b_%Y_%H_%M_%S", localtime()) masterdir = "sharpen_" + Tracker["constants"]["isac_dir"] os.mkdir(masterdir) else: if os.path.exists(masterdir): print("%s already exists" % masterdir) else: os.mkdir(masterdir) li = len(masterdir) else: li = 0 li = mpi_bcast(li, 1, MPI_INT, Blockdata["main_node"], MPI_COMM_WORLD)[0] masterdir = mpi_bcast(masterdir, li, MPI_CHAR, Blockdata["main_node"], MPI_COMM_WORLD) masterdir = string.join(masterdir, "") Tracker["constants"]["masterdir"] = masterdir log_main = Logger(BaseLogger_Files()) log_main.prefix = Tracker["constants"]["masterdir"] + "/" while not os.path.exists(Tracker["constants"]["masterdir"]): print("Node ", Blockdata["myid"], " waiting...", Tracker["constants"]["masterdir"]) sleep(1) mpi_barrier(MPI_COMM_WORLD) if (Blockdata["myid"] == Blockdata["main_node"]): init_dict = {} print(Tracker["constants"]["isac_dir"]) Tracker["directory"] = os.path.join(Tracker["constants"]["isac_dir"], "2dalignment") core = read_text_row( os.path.join(Tracker["directory"], "initial2Dparams.txt")) for im in xrange(len(core)): init_dict[im] = core[im] del core else: init_dict = 0 init_dict = wrap_mpi_bcast(init_dict, Blockdata["main_node"], communicator=MPI_COMM_WORLD) ### if (Blockdata["myid"] == Blockdata["main_node"]): #Tracker["constants"]["orgstack"] = "bdb:"+ os.path.join(Tracker["constants"]["isac_dir"],"../","sparx_stack") image = get_im(Tracker["constants"]["orgstack"], 0) Tracker["constants"]["nnxo"] = image.get_xsize() try: ctf_params = image.get_attr("ctf") if Tracker["constants"]["pixel_size"] == -1.: Tracker["constants"]["pixel_size"] = ctf_params.apix except: print("pixel size value is not given.") Tracker["ini_shrink"] = float( get_im(os.path.join(Tracker["directory"], "aqfinal.hdf"), 0).get_xsize()) / Tracker["constants"]["nnxo"] else: Tracker["ini_shrink"] = 0 Tracker = wrap_mpi_bcast(Tracker, Blockdata["main_node"], communicator=MPI_COMM_WORLD) #print(Tracker["constants"]["pixel_size"], "pixel_size") x_range = max(Tracker["constants"]["xrange"], int(1. / Tracker["ini_shrink"]) + 1) y_range = x_range if (Blockdata["myid"] == Blockdata["main_node"]): parameters = read_text_row( os.path.join(Tracker["constants"]["isac_dir"], "all_parameters.txt")) else: parameters = 0 parameters = wrap_mpi_bcast(parameters, Blockdata["main_node"], communicator=MPI_COMM_WORLD) params_dict = {} list_dict = {} #parepare params_dict if Tracker["constants"]["navg"] < 0: navg = EMUtil.get_image_count( os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf")) else: navg = min( Tracker["constants"]["navg"], EMUtil.get_image_count( os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf"))) global_dict = {} ptl_list = [] memlist = [] if (Blockdata["myid"] == Blockdata["main_node"]): for iavg in xrange(navg): params_of_this_average = [] image = get_im( os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf"), iavg) members = image.get_attr("members") memlist.append(members) for im in xrange(len(members)): abs_id = members[im] global_dict[abs_id] = [iavg, im] P = combine_params2( init_dict[abs_id][0], init_dict[abs_id][1], init_dict[abs_id][2], init_dict[abs_id][3], \ parameters[abs_id][0], parameters[abs_id][1]/Tracker["ini_shrink"], parameters[abs_id][2]/Tracker["ini_shrink"], parameters[abs_id][3]) if parameters[abs_id][3] == -1: print("wrong one") params_of_this_average.append([P[0], P[1], P[2], P[3], 1.0]) ptl_list.append(abs_id) params_dict[iavg] = params_of_this_average list_dict[iavg] = members write_text_row( params_of_this_average, os.path.join(Tracker["constants"]["masterdir"], "params_avg_%03d.txt" % iavg)) ptl_list.sort() init_params = [None for im in xrange(len(ptl_list))] for im in xrange(len(ptl_list)): init_params[im] = [ptl_list[im]] + params_dict[global_dict[ ptl_list[im]][0]][global_dict[ptl_list[im]][1]] write_text_row( init_params, os.path.join(Tracker["constants"]["masterdir"], "init_isac_params.txt")) else: params_dict = 0 list_dict = 0 memlist = 0 params_dict = wrap_mpi_bcast(params_dict, Blockdata["main_node"], communicator=MPI_COMM_WORLD) list_dict = wrap_mpi_bcast(list_dict, Blockdata["main_node"], communicator=MPI_COMM_WORLD) memlist = wrap_mpi_bcast(memlist, Blockdata["main_node"], communicator=MPI_COMM_WORLD) # Now computing! del init_dict tag_sharpen_avg = 1000 ## always apply low pass filter to B_enhanced images to suppress noise in high frequencies enforced_to_H1 = False if options.B_enhance: if Tracker["constants"]["low_pass_filter"] == -1: print("User does not provide low pass filter") enforced_to_H1 = True if navg < Blockdata["nproc"]: # Each CPU do one average FH_list = [None for im in xrange(navg)] for iavg in xrange(navg): if Blockdata["myid"] == iavg: mlist = [None for i in xrange(len(list_dict[iavg]))] for im in xrange(len(mlist)): mlist[im] = get_im(Tracker["constants"]["orgstack"], list_dict[iavg][im]) set_params2D(mlist[im], params_dict[iavg][im], xform="xform.align2d") if options.noctf: new_avg, frc, plist = compute_average_noctf( mlist, Tracker["constants"]["radius"]) else: new_avg, frc, plist = compute_average_ctf( mlist, Tracker["constants"]["radius"]) FH1 = get_optimistic_res(frc) #write_text_file(frc, os.path.join(Tracker["constants"]["masterdir"], "fsc%03d_before_ali.txt"%iavg)) if not options.skip_local_alignment: new_average1 = within_group_refinement([mlist[kik] for kik in xrange(0,len(mlist),2)], maskfile= None, randomize= False, ir=1.0, \ ou=Tracker["constants"]["radius"], rs=1.0, xrng=[x_range], yrng=[y_range], step=[Tracker["constants"]["xstep"]], \ dst=0.0, maxit=Tracker["constants"]["maxit"], FH = max(Tracker["constants"]["FH"], FH1), FF=0.1) new_average2 = within_group_refinement([mlist[kik] for kik in xrange(1,len(mlist),2)], maskfile= None, randomize= False, ir=1.0, \ ou=Tracker["constants"]["radius"], rs=1.0, xrng=[x_range], yrng=[y_range], step=[Tracker["constants"]["xstep"]], \ dst=0.0, maxit=Tracker["constants"]["maxit"], FH = max(Tracker["constants"]["FH"], FH1), FF=0.1) if options.noctf: new_avg, frc, plist = compute_average_noctf( mlist, Tracker["constants"]["radius"]) else: new_avg, frc, plist = compute_average_ctf( mlist, Tracker["constants"]["radius"]) FH2 = get_optimistic_res(frc) #write_text_file(frc, os.path.join(Tracker["constants"]["masterdir"], "fsc%03d.txt"%iavg)) #if Tracker["constants"]["nopwadj"]: # pw adjustment, 1. analytic model 2. PDB model 3. B-facttor enhancement else: FH2 = 0.0 FH_list[iavg] = [FH1, FH2] if options.B_enhance: new_avg, gb = apply_enhancement( new_avg, Tracker["constants"]["B_start"], Tracker["constants"]["pixel_size"], Tracker["constants"]["Bfactor"]) print("Process avg %d %f %f %f" % (iavg, gb, FH1, FH2)) elif options.adjust_to_given_pw2: roo = read_text_file(Tracker["constants"]["modelpw"], -1) roo = roo[0] # always put pw in the first column new_avg = adjust_pw_to_model( new_avg, Tracker["constants"]["pixel_size"], roo) elif options.adjust_to_analytic_model: new_avg = adjust_pw_to_model( new_avg, Tracker["constants"]["pixel_size"], None) elif options.no_adjustment: pass print("Process avg %d %f %f" % (iavg, FH1, FH2)) if Tracker["constants"]["low_pass_filter"] != -1.: if Tracker["constants"]["low_pass_filter"] == 1.: low_pass_filter = FH1 elif Tracker["constants"]["low_pass_filter"] == 2.: low_pass_filter = FH2 if options.skip_local_alignment: low_pass_filter = FH1 else: low_pass_filter = Tracker["constants"][ "low_pass_filter"] if low_pass_filter >= 0.45: low_pass_filter = 0.45 new_avg = filt_tanl(new_avg, low_pass_filter, 0.1) new_avg.set_attr("members", list_dict[iavg]) new_avg.set_attr("n_objects", len(list_dict[iavg])) mpi_barrier(MPI_COMM_WORLD) for im in xrange(navg): # avg if im == Blockdata[ "myid"] and Blockdata["myid"] != Blockdata["main_node"]: send_EMData(new_avg, Blockdata["main_node"], tag_sharpen_avg) elif Blockdata["myid"] == Blockdata["main_node"]: if im != Blockdata["main_node"]: new_avg_other_cpu = recv_EMData(im, tag_sharpen_avg) new_avg_other_cpu.set_attr("members", memlist[im]) new_avg_other_cpu.write_image( os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), im) else: new_avg.write_image( os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), im) if not options.skip_local_alignment: if im == Blockdata["myid"]: write_text_row( plist, os.path.join(Tracker["constants"]["masterdir"], "ali2d_local_params_avg_%03d.txt" % im)) if Blockdata["myid"] == im and Blockdata["myid"] != Blockdata[ "main_node"]: wrap_mpi_send(plist_dict[im], Blockdata["main_node"], MPI_COMM_WORLD) elif im != Blockdata["main_node"] and Blockdata[ "myid"] == Blockdata["main_node"]: dummy = wrap_mpi_recv(im, MPI_COMM_WORLD) plist_dict[im] = dummy if im == Blockdata["myid"] and im != Blockdata["main_node"]: wrap_mpi_send(FH_list[im], Blockdata["main_node"], MPI_COMM_WORLD) elif im != Blockdata["main_node"] and Blockdata[ "myid"] == Blockdata["main_node"]: dummy = wrap_mpi_recv(im, MPI_COMM_WORLD) FH_list[im] = dummy else: if im == Blockdata["myid"] and im != Blockdata["main_node"]: wrap_mpi_send(FH_list, Blockdata["main_node"], MPI_COMM_WORLD) elif im != Blockdata["main_node"] and Blockdata[ "myid"] == Blockdata["main_node"]: dummy = wrap_mpi_recv(im, MPI_COMM_WORLD) FH_list[im] = dummy[im] mpi_barrier(MPI_COMM_WORLD) else: FH_list = [[0, 0.0, 0.0] for im in xrange(navg)] image_start, image_end = MPI_start_end(navg, Blockdata["nproc"], Blockdata["myid"]) if Blockdata["myid"] == Blockdata["main_node"]: cpu_dict = {} for iproc in xrange(Blockdata["nproc"]): local_image_start, local_image_end = MPI_start_end( navg, Blockdata["nproc"], iproc) for im in xrange(local_image_start, local_image_end): cpu_dict[im] = iproc else: cpu_dict = 0 cpu_dict = wrap_mpi_bcast(cpu_dict, Blockdata["main_node"], communicator=MPI_COMM_WORLD) slist = [None for im in xrange(navg)] ini_list = [None for im in xrange(navg)] avg1_list = [None for im in xrange(navg)] avg2_list = [None for im in xrange(navg)] plist_dict = {} data_list = [None for im in xrange(navg)] if Blockdata["myid"] == Blockdata["main_node"]: print("read data") for iavg in xrange(image_start, image_end): mlist = [None for i in xrange(len(list_dict[iavg]))] for im in xrange(len(mlist)): mlist[im] = get_im(Tracker["constants"]["orgstack"], list_dict[iavg][im]) set_params2D(mlist[im], params_dict[iavg][im], xform="xform.align2d") data_list[iavg] = mlist print("read data done %d" % Blockdata["myid"]) #if Blockdata["myid"] == Blockdata["main_node"]: print("start to compute averages") for iavg in xrange(image_start, image_end): mlist = data_list[iavg] if options.noctf: new_avg, frc, plist = compute_average_noctf( mlist, Tracker["constants"]["radius"]) else: new_avg, frc, plist = compute_average_ctf( mlist, Tracker["constants"]["radius"]) FH1 = get_optimistic_res(frc) #write_text_file(frc, os.path.join(Tracker["constants"]["masterdir"], "fsc%03d_before_ali.txt"%iavg)) if not options.skip_local_alignment: new_average1 = within_group_refinement([mlist[kik] for kik in xrange(0,len(mlist),2)], maskfile= None, randomize= False, ir=1.0, \ ou=Tracker["constants"]["radius"], rs=1.0, xrng=[x_range], yrng=[y_range], step=[Tracker["constants"]["xstep"]], \ dst=0.0, maxit=Tracker["constants"]["maxit"], FH=max(Tracker["constants"]["FH"], FH1), FF=0.1) new_average2 = within_group_refinement([mlist[kik] for kik in xrange(1,len(mlist),2)], maskfile= None, randomize= False, ir=1.0, \ ou= Tracker["constants"]["radius"], rs=1.0, xrng=[ x_range], yrng=[y_range], step=[Tracker["constants"]["xstep"]], \ dst=0.0, maxit=Tracker["constants"]["maxit"], FH = max(Tracker["constants"]["FH"], FH1), FF=0.1) if options.noctf: new_avg, frc, plist = compute_average_noctf( mlist, Tracker["constants"]["radius"]) else: new_avg, frc, plist = compute_average_ctf( mlist, Tracker["constants"]["radius"]) plist_dict[iavg] = plist FH2 = get_optimistic_res(frc) else: FH2 = 0.0 #write_text_file(frc, os.path.join(Tracker["constants"]["masterdir"], "fsc%03d.txt"%iavg)) FH_list[iavg] = [iavg, FH1, FH2] if options.B_enhance: new_avg, gb = apply_enhancement( new_avg, Tracker["constants"]["B_start"], Tracker["constants"]["pixel_size"], Tracker["constants"]["Bfactor"]) print("Process avg %d %f %f %f" % (iavg, gb, FH1, FH2)) elif options.adjust_to_given_pw2: roo = read_text_file(Tracker["constants"]["modelpw"], -1) roo = roo[0] # always on the first column new_avg = adjust_pw_to_model( new_avg, Tracker["constants"]["pixel_size"], roo) print("Process avg %d %f %f" % (iavg, FH1, FH2)) elif adjust_to_analytic_model: new_avg = adjust_pw_to_model( new_avg, Tracker["constants"]["pixel_size"], None) print("Process avg %d %f %f" % (iavg, FH1, FH2)) elif options.no_adjustment: pass if Tracker["constants"]["low_pass_filter"] != -1.: new_avg = filt_tanl(new_avg, Tracker["constants"]["low_pass_filter"], 0.1) if Tracker["constants"]["low_pass_filter"] != -1.: if Tracker["constants"]["low_pass_filter"] == 1.: low_pass_filter = FH1 elif Tracker["constants"]["low_pass_filter"] == 2.: low_pass_filter = FH2 if options.skip_local_alignment: low_pass_filter = FH1 else: low_pass_filter = Tracker["constants"]["low_pass_filter"] if low_pass_filter >= 0.45: low_pass_filter = 0.45 new_avg = filt_tanl(new_avg, low_pass_filter, 0.1) else: if enforced_to_H1: new_avg = filt_tanl(new_avg, FH1, 0.1) if options.B_enhance: new_avg = fft(new_avg) new_avg.set_attr("members", list_dict[iavg]) new_avg.set_attr("n_objects", len(list_dict[iavg])) slist[iavg] = new_avg ## send to main node to write mpi_barrier(MPI_COMM_WORLD) for im in xrange(navg): # avg if cpu_dict[im] == Blockdata[ "myid"] and Blockdata["myid"] != Blockdata["main_node"]: send_EMData(slist[im], Blockdata["main_node"], tag_sharpen_avg) elif cpu_dict[im] == Blockdata["myid"] and Blockdata[ "myid"] == Blockdata["main_node"]: slist[im].set_attr("members", memlist[im]) slist[im].write_image( os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), im) elif cpu_dict[im] != Blockdata["myid"] and Blockdata[ "myid"] == Blockdata["main_node"]: new_avg_other_cpu = recv_EMData(cpu_dict[im], tag_sharpen_avg) new_avg_other_cpu.set_attr("members", memlist[im]) new_avg_other_cpu.write_image( os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), im) if not options.skip_local_alignment: if cpu_dict[im] == Blockdata["myid"]: write_text_row( plist_dict[im], os.path.join(Tracker["constants"]["masterdir"], "ali2d_local_params_avg_%03d.txt" % im)) if cpu_dict[im] == Blockdata[ "myid"] and cpu_dict[im] != Blockdata["main_node"]: wrap_mpi_send(plist_dict[im], Blockdata["main_node"], MPI_COMM_WORLD) wrap_mpi_send(FH_list, Blockdata["main_node"], MPI_COMM_WORLD) elif cpu_dict[im] != Blockdata["main_node"] and Blockdata[ "myid"] == Blockdata["main_node"]: dummy = wrap_mpi_recv(cpu_dict[im], MPI_COMM_WORLD) plist_dict[im] = dummy dummy = wrap_mpi_recv(cpu_dict[im], MPI_COMM_WORLD) FH_list[im] = dummy[im] else: if cpu_dict[im] == Blockdata[ "myid"] and cpu_dict[im] != Blockdata["main_node"]: wrap_mpi_send(FH_list, Blockdata["main_node"], MPI_COMM_WORLD) elif cpu_dict[im] != Blockdata["main_node"] and Blockdata[ "myid"] == Blockdata["main_node"]: dummy = wrap_mpi_recv(cpu_dict[im], MPI_COMM_WORLD) FH_list[im] = dummy[im] mpi_barrier(MPI_COMM_WORLD) mpi_barrier(MPI_COMM_WORLD) if not options.skip_local_alignment: if Blockdata["myid"] == Blockdata["main_node"]: ali3d_local_params = [None for im in xrange(len(ptl_list))] for im in xrange(len(ptl_list)): ali3d_local_params[im] = [ptl_list[im]] + plist_dict[ global_dict[ptl_list[im]][0]][global_dict[ptl_list[im]][1]] write_text_row( ali3d_local_params, os.path.join(Tracker["constants"]["masterdir"], "ali2d_local_params.txt")) write_text_row( FH_list, os.path.join(Tracker["constants"]["masterdir"], "FH_list.txt")) else: if Blockdata["myid"] == Blockdata["main_node"]: write_text_row( FH_list, os.path.join(Tracker["constants"]["masterdir"], "FH_list.txt")) mpi_barrier(MPI_COMM_WORLD) target_xr = 3 target_yr = 3 if (Blockdata["myid"] == 0): cmd = "{} {} {} {} {} {} {} {} {} {}".format("sxchains.py", os.path.join(Tracker["constants"]["masterdir"],"class_averages.hdf"),\ os.path.join(Tracker["constants"]["masterdir"],"junk.hdf"),os.path.join(Tracker["constants"]["masterdir"],"ordered_class_averages.hdf"),\ "--circular","--radius=%d"%Tracker["constants"]["radius"] , "--xr=%d"%(target_xr+1),"--yr=%d"%(target_yr+1),"--align", ">/dev/null") junk = cmdexecute(cmd) cmd = "{} {}".format( "rm -rf", os.path.join(Tracker["constants"]["masterdir"], "junk.hdf")) junk = cmdexecute(cmd) from mpi import mpi_finalize mpi_finalize() exit()
def do_volume_mrk02(ref_data): """ data - projections (scattered between cpus) or the volume. If volume, just do the volume processing options - the same for all cpus return - volume the same for all cpus """ from EMAN2 import Util from mpi import mpi_comm_rank, mpi_comm_size, MPI_COMM_WORLD from filter import filt_table from reconstruction import recons3d_4nn_MPI, recons3d_4nn_ctf_MPI from utilities import bcast_EMData_to_all, bcast_number_to_all, model_blank from fundamentals import rops_table, fftip, fft import types # Retrieve the function specific input arguments from ref_data data = ref_data[0] Tracker = ref_data[1] iter = ref_data[2] mpi_comm = ref_data[3] # # For DEBUG # print "Type of data %s" % (type(data)) # print "Type of Tracker %s" % (type(Tracker)) # print "Type of iter %s" % (type(iter)) # print "Type of mpi_comm %s" % (type(mpi_comm)) if(mpi_comm == None): mpi_comm = MPI_COMM_WORLD myid = mpi_comm_rank(mpi_comm) nproc = mpi_comm_size(mpi_comm) try: local_filter = Tracker["local_filter"] except: local_filter = False #========================================================================= # volume reconstruction if( type(data) == types.ListType ): if Tracker["constants"]["CTF"]: vol = recons3d_4nn_ctf_MPI(myid, data, Tracker["constants"]["snr"], \ symmetry=Tracker["constants"]["sym"], npad=Tracker["constants"]["npad"], mpi_comm=mpi_comm, smearstep = Tracker["smearstep"]) else: vol = recons3d_4nn_MPI (myid, data,\ symmetry=Tracker["constants"]["sym"], npad=Tracker["constants"]["npad"], mpi_comm=mpi_comm) else: vol = data if myid == 0: from morphology import threshold from filter import filt_tanl, filt_btwl from utilities import model_circle, get_im import types nx = vol.get_xsize() if(Tracker["constants"]["mask3D"] == None): mask3D = model_circle(int(Tracker["constants"]["radius"]*float(nx)/float(Tracker["constants"]["nnxo"])+0.5), nx, nx, nx) elif(Tracker["constants"]["mask3D"] == "auto"): from utilities import adaptive_mask mask3D = adaptive_mask(vol) else: if( type(Tracker["constants"]["mask3D"]) == types.StringType ): mask3D = get_im(Tracker["constants"]["mask3D"]) else: mask3D = (Tracker["constants"]["mask3D"]).copy() nxm = mask3D.get_xsize() if( nx != nxm): from fundamentals import rot_shift3D mask3D = Util.window(rot_shift3D(mask3D,scale=float(nx)/float(nxm)),nx,nx,nx) nxm = mask3D.get_xsize() assert(nx == nxm) stat = Util.infomask(vol, mask3D, False) vol -= stat[0] Util.mul_scalar(vol, 1.0/stat[1]) vol = threshold(vol) Util.mul_img(vol, mask3D) if( Tracker["PWadjustment"] ): from utilities import read_text_file, write_text_file rt = read_text_file( Tracker["PWadjustment"] ) fftip(vol) ro = rops_table(vol) # Here unless I am mistaken it is enough to take the beginning of the reference pw. for i in xrange(1,len(ro)): ro[i] = (rt[i]/ro[i])**Tracker["upscale"] #write_text_file(rops_table(filt_table( vol, ro),1),"foo.txt") if Tracker["constants"]["sausage"]: ny = vol.get_ysize() y = float(ny) from math import exp for i in xrange(len(ro)): ro[i] *= \ (1.0+1.0*exp(-(((i/y/Tracker["constants"]["pixel_size"])-0.10)/0.025)**2)+1.0*exp(-(((i/y/Tracker["constants"]["pixel_size"])-0.215)/0.025)**2)) if local_filter: # skip low-pass filtration vol = fft( filt_table( vol, ro) ) else: if( type(Tracker["lowpass"]) == types.ListType ): vol = fft( filt_table( filt_table(vol, Tracker["lowpass"]), ro) ) else: vol = fft( filt_table( filt_tanl(vol, Tracker["lowpass"], Tracker["falloff"]), ro) ) del ro else: if Tracker["constants"]["sausage"]: ny = vol.get_ysize() y = float(ny) ro = [0.0]*(ny//2+2) from math import exp for i in xrange(len(ro)): ro[i] = \ (1.0+1.0*exp(-(((i/y/Tracker["constants"]["pixel_size"])-0.10)/0.025)**2)+1.0*exp(-(((i/y/Tracker["constants"]["pixel_size"])-0.215)/0.025)**2)) fftip(vol) filt_table(vol, ro) del ro if not local_filter: if( type(Tracker["lowpass"]) == types.ListType ): vol = filt_table(vol, Tracker["lowpass"]) else: vol = filt_tanl(vol, Tracker["lowpass"], Tracker["falloff"]) if Tracker["constants"]["sausage"]: vol = fft(vol) if local_filter: from morphology import binarize if(myid == 0): nx = mask3D.get_xsize() else: nx = 0 nx = bcast_number_to_all(nx, source_node = 0) # only main processor needs the two input volumes if(myid == 0): mask = binarize(mask3D, 0.5) locres = get_im(Tracker["local_filter"]) lx = locres.get_xsize() if(lx != nx): if(lx < nx): from fundamentals import fdecimate, rot_shift3D mask = Util.window(rot_shift3D(mask,scale=float(lx)/float(nx)),lx,lx,lx) vol = fdecimate(vol, lx,lx,lx) else: ERROR("local filter cannot be larger than input volume","user function",1) stat = Util.infomask(vol, mask, False) vol -= stat[0] Util.mul_scalar(vol, 1.0/stat[1]) else: lx = 0 locres = model_blank(1,1,1) vol = model_blank(1,1,1) lx = bcast_number_to_all(lx, source_node = 0) if( myid != 0 ): mask = model_blank(lx,lx,lx) bcast_EMData_to_all(mask, myid, 0, comm=mpi_comm) from filter import filterlocal vol = filterlocal( locres, vol, mask, Tracker["falloff"], myid, 0, nproc) if myid == 0: if(lx < nx): from fundamentals import fpol vol = fpol(vol, nx,nx,nx) vol = threshold(vol) vol = filt_btwl(vol, 0.38, 0.5)# This will have to be corrected. Util.mul_img(vol, mask3D) del mask3D # vol.write_image('toto%03d.hdf'%iter) else: vol = model_blank(nx,nx,nx) else: if myid == 0: #from utilities import write_text_file #write_text_file(rops_table(vol,1),"goo.txt") stat = Util.infomask(vol, mask3D, False) vol -= stat[0] Util.mul_scalar(vol, 1.0/stat[1]) vol = threshold(vol) vol = filt_btwl(vol, 0.38, 0.5)# This will have to be corrected. Util.mul_img(vol, mask3D) del mask3D # vol.write_image('toto%03d.hdf'%iter) # broadcast volume bcast_EMData_to_all(vol, myid, 0, comm=mpi_comm) #========================================================================= return vol
def dovolume( ref_data ): from utilities import print_msg, read_text_row from filter import fit_tanh, filt_tanl from fundamentals import fshift from morphology import threshold # Prepare the reference in 3D alignment, this function corresponds to what do_volume does. # Input: list ref_data # 0 - mask # 1 - center flag # 2 - raw average # 3 - fsc result # Output: filtered, centered, and masked reference image # apply filtration (FSC) to reference image: global ref_ali2d_counter ref_ali2d_counter += 1 fl = ref_data[2].cmp("dot",ref_data[2], {"negative":0, "mask":ref_data[0]} ) print_msg("do_volume user function Step = %5d GOAL = %10.3e\n"%(ref_ali2d_counter,fl)) stat = Util.infomask(ref_data[2], ref_data[0], False) vol = ref_data[2] - stat[0] Util.mul_scalar(vol, 1.0/stat[1]) vol = threshold(vol) #Util.mul_img(vol, ref_data[0]) try: aa = read_text_row("flaa.txt")[0] fl = aa[0] aa=aa[1] except: fl = 0.4 aa = 0.2 msg = "Tangent filter: cut-off frequency = %10.3f fall-off = %10.3f\n"%(fl, aa) print_msg(msg) from utilities import read_text_file from fundamentals import rops_table, fftip, fft from filter import filt_table, filt_btwl fftip(vol) try: rt = read_text_file( "pwreference.txt" ) ro = rops_table(vol) # Here unless I am mistaken it is enough to take the beginning of the reference pw. for i in xrange(1,len(ro)): ro[i] = (rt[i]/ro[i])**0.5 vol = fft( filt_table( filt_tanl(vol, fl, aa), ro) ) msg = "Power spectrum adjusted\n" print_msg(msg) except: vol = fft( filt_tanl(vol, fl, aa) ) stat = Util.infomask(vol, ref_data[0], False) vol -= stat[0] Util.mul_scalar(vol, 1.0/stat[1]) vol = threshold(vol) vol = filt_btwl(vol, 0.38, 0.5) Util.mul_img(vol, ref_data[0]) if ref_data[1] == 1: cs = volf.phase_cog() msg = "Center x = %10.3f Center y = %10.3f Center z = %10.3f\n"%(cs[0], cs[1], cs[2]) print_msg(msg) volf = fshift(volf, -cs[0], -cs[1], -cs[2]) else: cs = [0.0]*3 return vol, cs
def main(): from logger import Logger, BaseLogger_Files arglist = [] i = 0 while( i < len(sys.argv) ): if sys.argv[i]=='-p4pg': i = i+2 elif sys.argv[i]=='-p4wd': i = i+2 else: arglist.append( sys.argv[i] ) i = i+1 progname = os.path.basename(arglist[0]) usage = progname + " stack outdir <mask> --focus=3Dmask --radius=outer_radius --delta=angular_step" +\ "--an=angular_neighborhood --maxit=max_iter --CTF --sym=c1 --function=user_function --independent=indenpendent_runs --number_of_images_per_group=number_of_images_per_group --low_pass_frequency=.25 --seed=random_seed" parser = OptionParser(usage,version=SPARXVERSION) parser.add_option("--focus", type ="string", default ='', help="bineary 3D mask for focused clustering ") parser.add_option("--ir", type = "int", default =1, help="inner radius for rotational correlation > 0 (set to 1)") parser.add_option("--radius", type = "int", default =-1, help="particle radius in pixel for rotational correlation <nx-1 (set to the radius of the particle)") parser.add_option("--maxit", type = "int", default =25, help="maximum number of iteration") parser.add_option("--rs", type = "int", default =1, help="step between rings in rotational correlation >0 (set to 1)" ) parser.add_option("--xr", type ="string", default ='1', help="range for translation search in x direction, search is +/-xr ") parser.add_option("--yr", type ="string", default ='-1', help="range for translation search in y direction, search is +/-yr (default = same as xr)") parser.add_option("--ts", type ="string", default ='0.25', help="step size of the translation search in both directions direction, search is -xr, -xr+ts, 0, xr-ts, xr ") parser.add_option("--delta", type ="string", default ='2', help="angular step of reference projections") parser.add_option("--an", type ="string", default ='-1', help="angular neighborhood for local searches") parser.add_option("--center", type ="int", default =0, help="0 - if you do not want the volume to be centered, 1 - center the volume using cog (default=0)") parser.add_option("--nassign", type ="int", default =1, help="number of reassignment iterations performed for each angular step (set to 3) ") parser.add_option("--nrefine", type ="int", default =0, help="number of alignment iterations performed for each angular step (set to 0)") parser.add_option("--CTF", action ="store_true", default =False, help="do CTF correction during clustring") parser.add_option("--stoprnct", type ="float", default =3.0, help="Minimum percentage of assignment change to stop the program") parser.add_option("--sym", type ="string", default ='c1', help="symmetry of the structure ") parser.add_option("--function", type ="string", default ='do_volume_mrk05', help="name of the reference preparation function") parser.add_option("--independent", type ="int", default = 3, help="number of independent run") parser.add_option("--number_of_images_per_group", type ="int", default =1000, help="number of groups") parser.add_option("--low_pass_filter", type ="float", default =-1.0, help="absolute frequency of low-pass filter for 3d sorting on the original image size" ) parser.add_option("--nxinit", type ="int", default =64, help="initial image size for sorting" ) parser.add_option("--unaccounted", action ="store_true", default =False, help="reconstruct the unaccounted images") parser.add_option("--seed", type ="int", default =-1, help="random seed for create initial random assignment for EQ Kmeans") parser.add_option("--smallest_group", type ="int", default =500, help="minimum members for identified group") parser.add_option("--sausage", action ="store_true", default =False, help="way of filter volume") parser.add_option("--chunkdir", type ="string", default ='', help="chunkdir for computing margin of error") parser.add_option("--PWadjustment", type ="string", default ='', help="1-D power spectrum of PDB file used for EM volume power spectrum correction") parser.add_option("--protein_shape", type ="string", default ='g', help="protein shape. It defines protein preferred orientation angles. Currently it has g and f two types ") parser.add_option("--upscale", type ="float", default =0.5, help=" scaling parameter to adjust the power spectrum of EM volumes") parser.add_option("--wn", type ="int", default =0, help="optimal window size for data processing") parser.add_option("--interpolation", type ="string", default ="4nn", help="3-d reconstruction interpolation method, two options trl and 4nn") (options, args) = parser.parse_args(arglist[1:]) if len(args) < 1 or len(args) > 4: print "usage: " + usage print "Please run '" + progname + " -h' for detailed options" else: if len(args)>2: mask_file = args[2] else: mask_file = None orgstack =args[0] masterdir =args[1] global_def.BATCH = True #---initialize MPI related variables from mpi import mpi_init, mpi_comm_size, MPI_COMM_WORLD, mpi_comm_rank,mpi_barrier,mpi_bcast, mpi_bcast, MPI_INT,MPI_CHAR sys.argv = mpi_init(len(sys.argv),sys.argv) nproc = mpi_comm_size(MPI_COMM_WORLD) myid = mpi_comm_rank(MPI_COMM_WORLD) mpi_comm = MPI_COMM_WORLD main_node= 0 # import some utilities from utilities import get_im,bcast_number_to_all,cmdexecute,write_text_file,read_text_file,wrap_mpi_bcast, get_params_proj, write_text_row from applications import recons3d_n_MPI, mref_ali3d_MPI, Kmref_ali3d_MPI from statistics import k_means_match_clusters_asg_new,k_means_stab_bbenum from applications import mref_ali3d_EQ_Kmeans, ali3d_mref_Kmeans_MPI # Create the main log file from logger import Logger,BaseLogger_Files if myid ==main_node: log_main=Logger(BaseLogger_Files()) log_main.prefix = masterdir+"/" else: log_main =None #--- fill input parameters into dictionary named after Constants Constants ={} Constants["stack"] = args[0] Constants["masterdir"] = masterdir Constants["mask3D"] = mask_file Constants["focus3Dmask"] = options.focus Constants["indep_runs"] = options.independent Constants["stoprnct"] = options.stoprnct Constants["number_of_images_per_group"] = options.number_of_images_per_group Constants["CTF"] = options.CTF Constants["maxit"] = options.maxit Constants["ir"] = options.ir Constants["radius"] = options.radius Constants["nassign"] = options.nassign Constants["rs"] = options.rs Constants["xr"] = options.xr Constants["yr"] = options.yr Constants["ts"] = options.ts Constants["delta"] = options.delta Constants["an"] = options.an Constants["sym"] = options.sym Constants["center"] = options.center Constants["nrefine"] = options.nrefine #Constants["fourvar"] = options.fourvar Constants["user_func"] = options.function Constants["low_pass_filter"] = options.low_pass_filter # enforced low_pass_filter #Constants["debug"] = options.debug Constants["main_log_prefix"] = args[1] #Constants["importali3d"] = options.importali3d Constants["myid"] = myid Constants["main_node"] = main_node Constants["nproc"] = nproc Constants["log_main"] = log_main Constants["nxinit"] = options.nxinit Constants["unaccounted"] = options.unaccounted Constants["seed"] = options.seed Constants["smallest_group"] = options.smallest_group Constants["sausage"] = options.sausage Constants["chunkdir"] = options.chunkdir Constants["PWadjustment"] = options.PWadjustment Constants["upscale"] = options.upscale Constants["wn"] = options.wn Constants["3d-interpolation"] = options.interpolation Constants["protein_shape"] = options.protein_shape # ----------------------------------------------------- # # Create and initialize Tracker dictionary with input options Tracker = {} Tracker["constants"] = Constants Tracker["maxit"] = Tracker["constants"]["maxit"] Tracker["radius"] = Tracker["constants"]["radius"] #Tracker["xr"] = "" #Tracker["yr"] = "-1" # Do not change! #Tracker["ts"] = 1 #Tracker["an"] = "-1" #Tracker["delta"] = "2.0" #Tracker["zoom"] = True #Tracker["nsoft"] = 0 #Tracker["local"] = False #Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] Tracker["upscale"] = Tracker["constants"]["upscale"] #Tracker["upscale"] = 0.5 Tracker["applyctf"] = False # Should the data be premultiplied by the CTF. Set to False for local continuous. #Tracker["refvol"] = None Tracker["nxinit"] = Tracker["constants"]["nxinit"] #Tracker["nxstep"] = 32 Tracker["icurrentres"] = -1 #Tracker["ireachedres"] = -1 #Tracker["lowpass"] = 0.4 #Tracker["falloff"] = 0.2 #Tracker["inires"] = options.inires # Now in A, convert to absolute before using Tracker["fuse_freq"] = 50 # Now in A, convert to absolute before using #Tracker["delpreviousmax"] = False #Tracker["anger"] = -1.0 #Tracker["shifter"] = -1.0 #Tracker["saturatecrit"] = 0.95 #Tracker["pixercutoff"] = 2.0 #Tracker["directory"] = "" #Tracker["previousoutputdir"] = "" #Tracker["eliminated-outliers"] = False #Tracker["mainiteration"] = 0 #Tracker["movedback"] = False #Tracker["state"] = Tracker["constants"]["states"][0] #Tracker["global_resolution"] =0.0 Tracker["orgstack"] = orgstack #-------------------------------------------------------------------- # import from utilities from utilities import sample_down_1D_curve,get_initial_ID,remove_small_groups,print_upper_triangular_matrix,print_a_line_with_timestamp from utilities import print_dict,get_resolution_mrk01,partition_to_groups,partition_independent_runs,get_outliers from utilities import merge_groups, save_alist, margin_of_error, get_margin_of_error, do_two_way_comparison, select_two_runs, get_ali3d_params from utilities import counting_projections, unload_dict, load_dict, get_stat_proj, create_random_list, get_number_of_groups, recons_mref from utilities import apply_low_pass_filter, get_groups_from_partition, get_number_of_groups, get_complementary_elements_total, update_full_dict from utilities import count_chunk_members, set_filter_parameters_from_adjusted_fsc, adjust_fsc_down, get_two_chunks_from_stack ####------------------------------------------------------------------ # # Get the pixel size; if none, set to 1.0, and the original image size from utilities import get_shrink_data_huang if(myid == main_node): line = strftime("%Y-%m-%d_%H:%M:%S", localtime()) + " =>" print(line+"Initialization of 3-D sorting") a = get_im(orgstack) nnxo = a.get_xsize() if( Tracker["nxinit"] > nnxo ): ERROR("Image size less than minimum permitted $d"%Tracker["nxinit"],"sxsort3d.py",1) nnxo = -1 else: if Tracker["constants"]["CTF"]: i = a.get_attr('ctf') pixel_size = i.apix fq = pixel_size/Tracker["fuse_freq"] else: pixel_size = 1.0 # No pixel size, fusing computed as 5 Fourier pixels fq = 5.0/nnxo del a else: nnxo = 0 fq = 0.0 pixel_size = 1.0 nnxo = bcast_number_to_all(nnxo, source_node = main_node) if( nnxo < 0 ): mpi_finalize() exit() pixel_size = bcast_number_to_all(pixel_size, source_node = main_node) fq = bcast_number_to_all(fq, source_node = main_node) if Tracker["constants"]["wn"]==0: Tracker["constants"]["nnxo"] = nnxo else: Tracker["constants"]["nnxo"] = Tracker["constants"]["wn"] nnxo = Tracker["constants"]["nnxo"] Tracker["constants"]["pixel_size"] = pixel_size Tracker["fuse_freq"] = fq del fq, nnxo, pixel_size if(Tracker["constants"]["radius"] < 1): Tracker["constants"]["radius"] = Tracker["constants"]["nnxo"]//2-2 elif((2*Tracker["constants"]["radius"] +2) > Tracker["constants"]["nnxo"]): ERROR("Particle radius set too large!","sxsort3d.py",1,myid) ####----------------------------------------------------------------------------------------- # Master directory if myid == main_node: if masterdir =="": timestring = strftime("_%d_%b_%Y_%H_%M_%S", localtime()) masterdir ="master_sort3d"+timestring li =len(masterdir) cmd="{} {}".format("mkdir", masterdir) os.system(cmd) else: li=0 li = mpi_bcast(li,1,MPI_INT,main_node,MPI_COMM_WORLD)[0] if li>0: masterdir = mpi_bcast(masterdir,li,MPI_CHAR,main_node,MPI_COMM_WORLD) import string masterdir = string.join(masterdir,"") if myid ==main_node: print_dict(Tracker["constants"],"Permanent settings of 3-D sorting program") ######### create a vstack from input stack to the local stack in masterdir # stack name set to default Tracker["constants"]["stack"] = "bdb:"+masterdir+"/rdata" Tracker["constants"]["ali3d"] = os.path.join(masterdir, "ali3d_init.txt") Tracker["constants"]["ctf_params"] = os.path.join(masterdir, "ctf_params.txt") Tracker["constants"]["partstack"] = Tracker["constants"]["ali3d"] # also serves for refinement if myid == main_node: total_stack = EMUtil.get_image_count(Tracker["orgstack"]) else: total_stack = 0 total_stack = bcast_number_to_all(total_stack, source_node = main_node) mpi_barrier(MPI_COMM_WORLD) from time import sleep while not os.path.exists(masterdir): print "Node ",myid," waiting..." sleep(5) mpi_barrier(MPI_COMM_WORLD) if myid == main_node: log_main.add("Sphire sort3d ") log_main.add("the sort3d master directory is "+masterdir) ##### ###---------------------------------------------------------------------------------- # Initial data analysis and handle two chunk files from random import shuffle # Compute the resolution #### make chunkdir dictionary for computing margin of error import user_functions user_func = user_functions.factory[Tracker["constants"]["user_func"]] chunk_dict = {} chunk_list = [] if myid == main_node: chunk_one = read_text_file(os.path.join(Tracker["constants"]["chunkdir"],"chunk0.txt")) chunk_two = read_text_file(os.path.join(Tracker["constants"]["chunkdir"],"chunk1.txt")) else: chunk_one = 0 chunk_two = 0 chunk_one = wrap_mpi_bcast(chunk_one, main_node) chunk_two = wrap_mpi_bcast(chunk_two, main_node) mpi_barrier(MPI_COMM_WORLD) ######################## Read/write bdb: data on main node ############################ if myid==main_node: if(orgstack[:4] == "bdb:"): cmd = "{} {} {}".format("e2bdb.py", orgstack,"--makevstack="+Tracker["constants"]["stack"]) else: cmd = "{} {} {}".format("sxcpy.py", orgstack, Tracker["constants"]["stack"]) cmdexecute(cmd) cmd = "{} {} {}".format("sxheader.py --params=xform.projection", "--export="+Tracker["constants"]["ali3d"],orgstack) cmdexecute(cmd) cmd = "{} {} {}".format("sxheader.py --params=ctf", "--export="+Tracker["constants"]["ctf_params"],orgstack) cmdexecute(cmd) mpi_barrier(MPI_COMM_WORLD) ########----------------------------------------------------------------------------- Tracker["total_stack"] = total_stack Tracker["constants"]["total_stack"] = total_stack Tracker["shrinkage"] = float(Tracker["nxinit"])/Tracker["constants"]["nnxo"] Tracker["radius"] = Tracker["constants"]["radius"]*Tracker["shrinkage"] if Tracker["constants"]["mask3D"]: Tracker["mask3D"] = os.path.join(masterdir,"smask.hdf") else: Tracker["mask3D"] = None if Tracker["constants"]["focus3Dmask"]: Tracker["focus3D"] = os.path.join(masterdir,"sfocus.hdf") else: Tracker["focus3D"] = None if myid == main_node: if Tracker["constants"]["mask3D"]: mask_3D = get_shrink_3dmask(Tracker["nxinit"],Tracker["constants"]["mask3D"]) mask_3D.write_image(Tracker["mask3D"]) if Tracker["constants"]["focus3Dmask"]: mask_3D = get_shrink_3dmask(Tracker["nxinit"],Tracker["constants"]["focus3Dmask"]) st = Util.infomask(mask_3D, None, True) if( st[0] == 0.0 ): ERROR("sxrsort3d","incorrect focused mask, after binarize all values zero",1) mask_3D.write_image(Tracker["focus3D"]) del mask_3D if Tracker["constants"]["PWadjustment"] !='': PW_dict = {} nxinit_pwsp = sample_down_1D_curve(Tracker["constants"]["nxinit"],Tracker["constants"]["nnxo"],Tracker["constants"]["PWadjustment"]) Tracker["nxinit_PW"] = os.path.join(masterdir,"spwp.txt") if myid == main_node: write_text_file(nxinit_pwsp,Tracker["nxinit_PW"]) PW_dict[Tracker["constants"]["nnxo"]] = Tracker["constants"]["PWadjustment"] PW_dict[Tracker["constants"]["nxinit"]] = Tracker["nxinit_PW"] Tracker["PW_dict"] = PW_dict mpi_barrier(MPI_COMM_WORLD) #-----------------------From two chunks to FSC, and low pass filter-----------------------------------------### for element in chunk_one: chunk_dict[element] = 0 for element in chunk_two: chunk_dict[element] = 1 chunk_list =[chunk_one, chunk_two] Tracker["chunk_dict"] = chunk_dict Tracker["P_chunk0"] = len(chunk_one)/float(total_stack) Tracker["P_chunk1"] = len(chunk_two)/float(total_stack) ### create two volumes to estimate resolution if myid == main_node: for index in xrange(2): write_text_file(chunk_list[index],os.path.join(masterdir,"chunk%01d.txt"%index)) mpi_barrier(MPI_COMM_WORLD) vols = [] for index in xrange(2): data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nxinit"], os.path.join(masterdir,"chunk%01d.txt"%index), Tracker["constants"]["partstack"],myid,main_node,nproc,preshift=True) vol = recons3d_4nn_ctf_MPI(myid=myid, prjlist=data,symmetry=Tracker["constants"]["sym"], finfo=None) if myid == main_node: vol.write_image(os.path.join(masterdir, "vol%d.hdf"%index)) vols.append(vol) mpi_barrier(MPI_COMM_WORLD) if myid ==main_node: low_pass, falloff,currentres = get_resolution_mrk01(vols,Tracker["constants"]["radius"],Tracker["constants"]["nxinit"],masterdir,Tracker["mask3D"]) if low_pass >Tracker["constants"]["low_pass_filter"]: low_pass= Tracker["constants"]["low_pass_filter"] else: low_pass =0.0 falloff =0.0 currentres =0.0 bcast_number_to_all(currentres,source_node = main_node) bcast_number_to_all(low_pass,source_node = main_node) bcast_number_to_all(falloff,source_node = main_node) Tracker["currentres"] = currentres Tracker["falloff"] = falloff if Tracker["constants"]["low_pass_filter"] ==-1.0: Tracker["low_pass_filter"] = min(.45,low_pass/Tracker["shrinkage"]) # no better than .45 else: Tracker["low_pass_filter"] = min(.45,Tracker["constants"]["low_pass_filter"]/Tracker["shrinkage"]) Tracker["lowpass"] = Tracker["low_pass_filter"] Tracker["falloff"] =.1 Tracker["global_fsc"] = os.path.join(masterdir, "fsc.txt") ############################################################################################ if myid == main_node: log_main.add("The command-line inputs are as following:") log_main.add("**********************************************************") for a in sys.argv: if myid == main_node:log_main.add(a) if myid == main_node: log_main.add("number of cpus used in this run is %d"%Tracker["constants"]["nproc"]) log_main.add("**********************************************************") from filter import filt_tanl ### START 3-D sorting if myid ==main_node: log_main.add("----------3-D sorting program------- ") log_main.add("current resolution %6.3f for images of original size in terms of absolute frequency"%Tracker["currentres"]) log_main.add("equivalent to %f Angstrom resolution"%(Tracker["constants"]["pixel_size"]/Tracker["currentres"]/Tracker["shrinkage"])) log_main.add("the user provided enforced low_pass_filter is %f"%Tracker["constants"]["low_pass_filter"]) #log_main.add("equivalent to %f Angstrom resolution"%(Tracker["constants"]["pixel_size"]/Tracker["constants"]["low_pass_filter"])) for index in xrange(2): filt_tanl(get_im(os.path.join(masterdir,"vol%01d.hdf"%index)), Tracker["low_pass_filter"],Tracker["falloff"]).write_image(os.path.join(masterdir, "volf%01d.hdf"%index)) mpi_barrier(MPI_COMM_WORLD) from utilities import get_input_from_string delta = get_input_from_string(Tracker["constants"]["delta"]) delta = delta[0] from utilities import even_angles n_angles = even_angles(delta, 0, 180) this_ali3d = Tracker["constants"]["ali3d"] sampled = get_stat_proj(Tracker,delta,this_ali3d) if myid ==main_node: nc = 0 for a in sampled: if len(sampled[a])>0: nc += 1 log_main.add("total sampled direction %10d at angle step %6.3f"%(len(n_angles), delta)) log_main.add("captured sampled directions %10d percentage covered by data %6.3f"%(nc,float(nc)/len(n_angles)*100)) number_of_images_per_group = Tracker["constants"]["number_of_images_per_group"] if myid ==main_node: log_main.add("user provided number_of_images_per_group %d"%number_of_images_per_group) Tracker["number_of_images_per_group"] = number_of_images_per_group number_of_groups = get_number_of_groups(total_stack,number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups generation =0 partition_dict ={} full_dict ={} workdir =os.path.join(masterdir,"generation%03d"%generation) Tracker["this_dir"] = workdir if myid ==main_node: log_main.add("---- generation %5d"%generation) log_main.add("number of images per group is set as %d"%number_of_images_per_group) log_main.add("the initial number of groups is %10d "%number_of_groups) cmd="{} {}".format("mkdir",workdir) os.system(cmd) mpi_barrier(MPI_COMM_WORLD) list_to_be_processed = range(Tracker["constants"]["total_stack"]) Tracker["this_data_list"] = list_to_be_processed create_random_list(Tracker) ################################# full_dict ={} for iptl in xrange(Tracker["constants"]["total_stack"]): full_dict[iptl] = iptl Tracker["full_ID_dict"] = full_dict ################################# for indep_run in xrange(Tracker["constants"]["indep_runs"]): Tracker["this_particle_list"] = Tracker["this_indep_list"][indep_run] ref_vol = recons_mref(Tracker) if myid == main_node: log_main.add("independent run %10d"%indep_run) mpi_barrier(MPI_COMM_WORLD) Tracker["this_data_list"] = list_to_be_processed Tracker["total_stack"] = len(Tracker["this_data_list"]) Tracker["this_particle_text_file"] = os.path.join(workdir,"independent_list_%03d.txt"%indep_run) # for get_shrink_data if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_particle_text_file"]) mpi_barrier(MPI_COMM_WORLD) outdir = os.path.join(workdir, "EQ_Kmeans%03d"%indep_run) ref_vol = apply_low_pass_filter(ref_vol,Tracker) mref_ali3d_EQ_Kmeans(ref_vol, outdir, Tracker["this_particle_text_file"], Tracker) partition_dict[indep_run]=Tracker["this_partition"] Tracker["partition_dict"] = partition_dict Tracker["total_stack"] = len(Tracker["this_data_list"]) Tracker["this_total_stack"] = Tracker["total_stack"] ############################### do_two_way_comparison(Tracker) ############################### ref_vol_list = [] from time import sleep number_of_ref_class = [] for igrp in xrange(len(Tracker["two_way_stable_member"])): Tracker["this_data_list"] = Tracker["two_way_stable_member"][igrp] Tracker["this_data_list_file"] = os.path.join(workdir,"stable_class%d.txt"%igrp) if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_data_list_file"]) data,old_shifts = get_shrink_data_huang(Tracker,Tracker["nxinit"], Tracker["this_data_list_file"], Tracker["constants"]["partstack"], myid, main_node, nproc, preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"], finfo = None) ref_vol_list.append(volref) number_of_ref_class.append(len(Tracker["this_data_list"])) if myid == main_node: log_main.add("group %d members %d "%(igrp,len(Tracker["this_data_list"]))) Tracker["number_of_ref_class"] = number_of_ref_class nx_of_image = ref_vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] # no PW adjustment if myid == main_node: for iref in xrange(len(ref_vol_list)): refdata = [None]*4 refdata[0] = ref_vol_list[iref] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image(os.path.join(workdir,"volf_stable.hdf"),iref) mpi_barrier(MPI_COMM_WORLD) Tracker["this_data_list"] = Tracker["this_accounted_list"] outdir = os.path.join(workdir,"Kmref") empty_group, res_groups, final_list = ali3d_mref_Kmeans_MPI(ref_vol_list,outdir,Tracker["this_accounted_text"],Tracker) Tracker["this_unaccounted_list"] = get_complementary_elements(list_to_be_processed,final_list) if myid == main_node: log_main.add("the number of particles not processed is %d"%len(Tracker["this_unaccounted_list"])) write_text_file(Tracker["this_unaccounted_list"],Tracker["this_unaccounted_text"]) update_full_dict(Tracker["this_unaccounted_list"], Tracker) ####################################### number_of_groups = len(res_groups) vol_list = [] number_of_ref_class = [] for igrp in xrange(number_of_groups): data,old_shifts = get_shrink_data_huang(Tracker, Tracker["constants"]["nnxo"], os.path.join(outdir,"Class%d.txt"%igrp), Tracker["constants"]["partstack"],myid,main_node,nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"], finfo=None) vol_list.append(volref) if( myid == main_node ): npergroup = len(read_text_file(os.path.join(outdir,"Class%d.txt"%igrp))) else: npergroup = 0 npergroup = bcast_number_to_all(npergroup, main_node ) number_of_ref_class.append(npergroup) Tracker["number_of_ref_class"] = number_of_ref_class mpi_barrier(MPI_COMM_WORLD) nx_of_image = vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"]=Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"]=Tracker["constants"]["PWadjustment"] if myid == main_node: for ivol in xrange(len(vol_list)): refdata =[None]*4 refdata[0] = vol_list[ivol] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image(os.path.join(workdir,"volf_of_Classes.hdf"),ivol) log_main.add("number of unaccounted particles %10d"%len(Tracker["this_unaccounted_list"])) log_main.add("number of accounted particles %10d"%len(Tracker["this_accounted_list"])) Tracker["this_data_list"] = Tracker["this_unaccounted_list"] # reset parameters for the next round calculation Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) Tracker["this_total_stack"] = Tracker["total_stack"] number_of_groups = get_number_of_groups(len(Tracker["this_unaccounted_list"]),number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups while number_of_groups >= 2 : generation +=1 partition_dict ={} workdir =os.path.join(masterdir,"generation%03d"%generation) Tracker["this_dir"] = workdir if myid ==main_node: log_main.add("*********************************************") log_main.add("----- generation %5d "%generation) log_main.add("number of images per group is set as %10d "%number_of_images_per_group) log_main.add("the number of groups is %10d "%number_of_groups) log_main.add(" number of particles for clustering is %10d"%Tracker["total_stack"]) cmd ="{} {}".format("mkdir",workdir) os.system(cmd) mpi_barrier(MPI_COMM_WORLD) create_random_list(Tracker) for indep_run in xrange(Tracker["constants"]["indep_runs"]): Tracker["this_particle_list"] = Tracker["this_indep_list"][indep_run] ref_vol = recons_mref(Tracker) if myid == main_node: log_main.add("independent run %10d"%indep_run) outdir = os.path.join(workdir, "EQ_Kmeans%03d"%indep_run) Tracker["this_data_list"] = Tracker["this_unaccounted_list"] #ref_vol=apply_low_pass_filter(ref_vol,Tracker) mref_ali3d_EQ_Kmeans(ref_vol,outdir,Tracker["this_unaccounted_text"],Tracker) partition_dict[indep_run] = Tracker["this_partition"] Tracker["this_data_list"] = Tracker["this_unaccounted_list"] Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) Tracker["partition_dict"] = partition_dict Tracker["this_total_stack"] = Tracker["total_stack"] total_list_of_this_run = Tracker["this_unaccounted_list"] ############################### do_two_way_comparison(Tracker) ############################### ref_vol_list = [] number_of_ref_class = [] for igrp in xrange(len(Tracker["two_way_stable_member"])): Tracker["this_data_list"] = Tracker["two_way_stable_member"][igrp] Tracker["this_data_list_file"] = os.path.join(workdir,"stable_class%d.txt"%igrp) if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_data_list_file"]) mpi_barrier(MPI_COMM_WORLD) data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nxinit"],Tracker["this_data_list_file"],Tracker["constants"]["partstack"],myid,main_node,nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"],finfo= None) #volref = filt_tanl(volref, Tracker["constants"]["low_pass_filter"],.1) if myid == main_node:volref.write_image(os.path.join(workdir,"vol_stable.hdf"),iref) #volref = resample(volref,Tracker["shrinkage"]) ref_vol_list.append(volref) number_of_ref_class.append(len(Tracker["this_data_list"])) mpi_barrier(MPI_COMM_WORLD) Tracker["number_of_ref_class"] = number_of_ref_class Tracker["this_data_list"] = Tracker["this_accounted_list"] outdir = os.path.join(workdir,"Kmref") empty_group, res_groups, final_list = ali3d_mref_Kmeans_MPI(ref_vol_list,outdir,Tracker["this_accounted_text"],Tracker) # calculate the 3-D structure of original image size for each group number_of_groups = len(res_groups) Tracker["this_unaccounted_list"] = get_complementary_elements(total_list_of_this_run,final_list) if myid == main_node: log_main.add("the number of particles not processed is %d"%len(Tracker["this_unaccounted_list"])) write_text_file(Tracker["this_unaccounted_list"],Tracker["this_unaccounted_text"]) mpi_barrier(MPI_COMM_WORLD) update_full_dict(Tracker["this_unaccounted_list"],Tracker) vol_list = [] for igrp in xrange(number_of_groups): data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nnxo"], os.path.join(outdir,"Class%d.txt"%igrp), Tracker["constants"]["partstack"], myid, main_node, nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"],finfo= None) vol_list.append(volref) mpi_barrier(MPI_COMM_WORLD) nx_of_image=ref_vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] if myid == main_node: for ivol in xrange(len(vol_list)): refdata = [None]*4 refdata[0] = vol_list[ivol] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image(os.path.join(workdir, "volf_of_Classes.hdf"),ivol) log_main.add("number of unaccounted particles %10d"%len(Tracker["this_unaccounted_list"])) log_main.add("number of accounted particles %10d"%len(Tracker["this_accounted_list"])) del vol_list mpi_barrier(MPI_COMM_WORLD) number_of_groups = get_number_of_groups(len(Tracker["this_unaccounted_list"]),number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups Tracker["this_data_list"] = Tracker["this_unaccounted_list"] Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) if Tracker["constants"]["unaccounted"]: data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nnxo"],Tracker["this_unaccounted_text"],Tracker["constants"]["partstack"],myid,main_node,nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"],finfo= None) nx_of_image = volref.get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"]=Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"]=Tracker["constants"]["PWadjustment"] if( myid == main_node ): refdata = [None]*4 refdata[0] = volref refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) #volref = filt_tanl(volref, Tracker["constants"]["low_pass_filter"],.1) volref.write_image(os.path.join(workdir,"volf_unaccounted.hdf")) # Finish program if myid ==main_node: log_main.add("sxsort3d finishes") mpi_barrier(MPI_COMM_WORLD) from mpi import mpi_finalize mpi_finalize() exit()
def calculate_volumes_after_rotation_and_save_them(ali3d_options, rviper_iter, masterdir, bdb_stack_location, mpi_rank, mpi_size, no_of_viper_runs_analyzed_together, no_of_viper_runs_analyzed_together_from_user_options, mpi_comm = -1): # This function takes into account the case in which there are more processors than images if mpi_comm == -1: mpi_comm = MPI_COMM_WORLD # some arguments are for debugging purposes mainoutputdir = masterdir + DIR_DELIM + NAME_OF_MAIN_DIR + ("%03d" + DIR_DELIM) %(rviper_iter) # list_of_projection_indices_used_for_outlier_elimination = map(int, read_text_file(mainoutputdir + DIR_DELIM + "list_of_viper_runs_included_in_outlier_elimination.txt")) import json; f = open(mainoutputdir + "list_of_viper_runs_included_in_outlier_elimination.json", 'r') list_of_independent_viper_run_indices_used_for_outlier_elimination = json.load(f); f.close() if len(list_of_independent_viper_run_indices_used_for_outlier_elimination)==0: print "Error: len(list_of_independent_viper_run_indices_used_for_outlier_elimination)==0" mpi_finalize() sys.exit() # if this data analysis step was already performed in the past then return # for future changes make sure that the file checked is the last one to be processed !!! # if(os.path.exists(mainoutputdir + DIR_DELIM + NAME_OF_RUN_DIR + "%03d"%(no_of_viper_runs_analyzed_together - 1) + DIR_DELIM + "rotated_volume.hdf")): # check_last_run = max(get_latest_directory_increment_value(mainoutputdir, NAME_OF_RUN_DIR, start_value=0), no_of_viper_runs_analyzed_together_from_user_options) # if(os.path.exists(mainoutputdir + DIR_DELIM + NAME_OF_RUN_DIR + "%03d"%(check_last_run) + DIR_DELIM + "rotated_volume.hdf")): # return # if this data analysis step was already performed in the past then return for check_run in list_of_independent_viper_run_indices_used_for_outlier_elimination: if not (os.path.exists(mainoutputdir + DIR_DELIM + NAME_OF_RUN_DIR + "%03d"%(check_run) + DIR_DELIM + "rotated_volume.hdf")): break else: return partstack = [] # for i1 in range(0,no_of_viper_runs_analyzed_together): for i1 in list_of_independent_viper_run_indices_used_for_outlier_elimination: partstack.append(mainoutputdir + NAME_OF_RUN_DIR + "%03d"%(i1) + DIR_DELIM + "rotated_reduced_params.txt") partids_file_name = mainoutputdir + "this_iteration_index_keep_images.txt" lpartids = map(int, read_text_file(partids_file_name) ) n_projs = len(lpartids) if (mpi_size > n_projs): # if there are more processors than images working = int(not(mpi_rank < n_projs)) mpi_subcomm = mpi_comm_split(mpi_comm, working, mpi_rank - working*n_projs) mpi_subsize = mpi_comm_size(mpi_subcomm) mpi_subrank = mpi_comm_rank(mpi_subcomm) if (mpi_rank < n_projs): # for i in xrange(no_of_viper_runs_analyzed_together): for idx, i in enumerate(list_of_independent_viper_run_indices_used_for_outlier_elimination): projdata = getindexdata(bdb_stack_location + "_%03d"%(rviper_iter - 1), partids_file_name, partstack[idx], mpi_rank, mpi_subsize) vol = do_volume(projdata, ali3d_options, 0, mpi_comm = mpi_subcomm) del projdata if( mpi_rank == 0): vol.write_image(mainoutputdir + DIR_DELIM + NAME_OF_RUN_DIR + "%03d"%(i) + DIR_DELIM + "rotated_volume.hdf") line = strftime("%Y-%m-%d_%H:%M:%S", localtime()) + " => " print line + "Generated rec_ref_volume_run #%01d \n"%i del vol mpi_barrier(mpi_comm) else: for idx, i in enumerate(list_of_independent_viper_run_indices_used_for_outlier_elimination): projdata = getindexdata(bdb_stack_location + "_%03d"%(rviper_iter - 1), partids_file_name, partstack[idx], mpi_rank, mpi_size) vol = do_volume(projdata, ali3d_options, 0, mpi_comm = mpi_comm) del projdata if( mpi_rank == 0): vol.write_image(mainoutputdir + DIR_DELIM + NAME_OF_RUN_DIR + "%03d"%(i) + DIR_DELIM + "rotated_volume.hdf") line = strftime("%Y-%m-%d_%H:%M:%S", localtime()) + " => " print line + "Generated rec_ref_volume_run #%01d"%i del vol if( mpi_rank == 0): # Align all rotated volumes, calculate their average and save as an overall result from utilities import get_params3D, set_params3D, get_im, model_circle from statistics import ave_var from applications import ali_vol # vls = [None]*no_of_viper_runs_analyzed_together vls = [None]*len(list_of_independent_viper_run_indices_used_for_outlier_elimination) # for i in xrange(no_of_viper_runs_analyzed_together): for idx, i in enumerate(list_of_independent_viper_run_indices_used_for_outlier_elimination): vls[idx] = get_im(mainoutputdir + DIR_DELIM + NAME_OF_RUN_DIR + "%03d"%(i) + DIR_DELIM + "rotated_volume.hdf") set_params3D(vls[idx],[0.,0.,0.,0.,0.,0.,0,1.0]) asa,sas = ave_var(vls) # do the alignment nx = asa.get_xsize() radius = nx/2 - .5 st = Util.infomask(asa*asa, model_circle(radius,nx,nx,nx), True) goal = st[0] going = True while(going): set_params3D(asa,[0.,0.,0.,0.,0.,0.,0,1.0]) # for i in xrange(no_of_viper_runs_analyzed_together): for idx, i in enumerate(list_of_independent_viper_run_indices_used_for_outlier_elimination): o = ali_vol(vls[idx],asa,7.0,5.,radius) # range of angles and shifts, maybe should be adjusted p = get_params3D(o) del o set_params3D(vls[idx],p) asa,sas = ave_var(vls) st = Util.infomask(asa*asa, model_circle(radius,nx,nx,nx), True) if(st[0] > goal): goal = st[0] else: going = False # over and out asa.write_image(mainoutputdir + DIR_DELIM + "average_volume.hdf") sas.write_image(mainoutputdir + DIR_DELIM + "variance_volume.hdf") return
def do_volume_mrk02(ref_data): """ data - projections (scattered between cpus) or the volume. If volume, just do the volume processing options - the same for all cpus return - volume the same for all cpus """ from EMAN2 import Util from mpi import mpi_comm_rank, mpi_comm_size, MPI_COMM_WORLD from filter import filt_table from reconstruction import recons3d_4nn_MPI, recons3d_4nn_ctf_MPI from utilities import bcast_EMData_to_all, bcast_number_to_all, model_blank from fundamentals import rops_table, fftip, fft import types # Retrieve the function specific input arguments from ref_data data = ref_data[0] Tracker = ref_data[1] iter = ref_data[2] mpi_comm = ref_data[3] # # For DEBUG # print "Type of data %s" % (type(data)) # print "Type of Tracker %s" % (type(Tracker)) # print "Type of iter %s" % (type(iter)) # print "Type of mpi_comm %s" % (type(mpi_comm)) if (mpi_comm == None): mpi_comm = MPI_COMM_WORLD myid = mpi_comm_rank(mpi_comm) nproc = mpi_comm_size(mpi_comm) try: local_filter = Tracker["local_filter"] except: local_filter = False #========================================================================= # volume reconstruction if (type(data) == types.ListType): if Tracker["constants"]["CTF"]: vol = recons3d_4nn_ctf_MPI(myid, data, Tracker["constants"]["snr"], \ symmetry=Tracker["constants"]["sym"], npad=Tracker["constants"]["npad"], mpi_comm=mpi_comm, smearstep = Tracker["smearstep"]) else: vol = recons3d_4nn_MPI (myid, data,\ symmetry=Tracker["constants"]["sym"], npad=Tracker["constants"]["npad"], mpi_comm=mpi_comm) else: vol = data if myid == 0: from morphology import threshold from filter import filt_tanl, filt_btwl from utilities import model_circle, get_im import types nx = vol.get_xsize() if (Tracker["constants"]["mask3D"] == None): mask3D = model_circle( int(Tracker["constants"]["radius"] * float(nx) / float(Tracker["constants"]["nnxo"]) + 0.5), nx, nx, nx) elif (Tracker["constants"]["mask3D"] == "auto"): from utilities import adaptive_mask mask3D = adaptive_mask(vol) else: if (type(Tracker["constants"]["mask3D"]) == types.StringType): mask3D = get_im(Tracker["constants"]["mask3D"]) else: mask3D = (Tracker["constants"]["mask3D"]).copy() nxm = mask3D.get_xsize() if (nx != nxm): from fundamentals import rot_shift3D mask3D = Util.window( rot_shift3D(mask3D, scale=float(nx) / float(nxm)), nx, nx, nx) nxm = mask3D.get_xsize() assert (nx == nxm) stat = Util.infomask(vol, mask3D, False) vol -= stat[0] Util.mul_scalar(vol, 1.0 / stat[1]) vol = threshold(vol) Util.mul_img(vol, mask3D) if (Tracker["PWadjustment"]): from utilities import read_text_file, write_text_file rt = read_text_file(Tracker["PWadjustment"]) fftip(vol) ro = rops_table(vol) # Here unless I am mistaken it is enough to take the beginning of the reference pw. for i in xrange(1, len(ro)): ro[i] = (rt[i] / ro[i])**Tracker["upscale"] #write_text_file(rops_table(filt_table( vol, ro),1),"foo.txt") if Tracker["constants"]["sausage"]: ny = vol.get_ysize() y = float(ny) from math import exp for i in xrange(len(ro)): ro[i] *= \ (1.0+1.0*exp(-(((i/y/Tracker["constants"]["pixel_size"])-0.10)/0.025)**2)+1.0*exp(-(((i/y/Tracker["constants"]["pixel_size"])-0.215)/0.025)**2)) if local_filter: # skip low-pass filtration vol = fft(filt_table(vol, ro)) else: if (type(Tracker["lowpass"]) == types.ListType): vol = fft( filt_table(filt_table(vol, Tracker["lowpass"]), ro)) else: vol = fft( filt_table( filt_tanl(vol, Tracker["lowpass"], Tracker["falloff"]), ro)) del ro else: if Tracker["constants"]["sausage"]: ny = vol.get_ysize() y = float(ny) ro = [0.0] * (ny // 2 + 2) from math import exp for i in xrange(len(ro)): ro[i] = \ (1.0+1.0*exp(-(((i/y/Tracker["constants"]["pixel_size"])-0.10)/0.025)**2)+1.0*exp(-(((i/y/Tracker["constants"]["pixel_size"])-0.215)/0.025)**2)) fftip(vol) filt_table(vol, ro) del ro if not local_filter: if (type(Tracker["lowpass"]) == types.ListType): vol = filt_table(vol, Tracker["lowpass"]) else: vol = filt_tanl(vol, Tracker["lowpass"], Tracker["falloff"]) if Tracker["constants"]["sausage"]: vol = fft(vol) if local_filter: from morphology import binarize if (myid == 0): nx = mask3D.get_xsize() else: nx = 0 nx = bcast_number_to_all(nx, source_node=0) # only main processor needs the two input volumes if (myid == 0): mask = binarize(mask3D, 0.5) locres = get_im(Tracker["local_filter"]) lx = locres.get_xsize() if (lx != nx): if (lx < nx): from fundamentals import fdecimate, rot_shift3D mask = Util.window( rot_shift3D(mask, scale=float(lx) / float(nx)), lx, lx, lx) vol = fdecimate(vol, lx, lx, lx) else: ERROR("local filter cannot be larger than input volume", "user function", 1) stat = Util.infomask(vol, mask, False) vol -= stat[0] Util.mul_scalar(vol, 1.0 / stat[1]) else: lx = 0 locres = model_blank(1, 1, 1) vol = model_blank(1, 1, 1) lx = bcast_number_to_all(lx, source_node=0) if (myid != 0): mask = model_blank(lx, lx, lx) bcast_EMData_to_all(mask, myid, 0, comm=mpi_comm) from filter import filterlocal vol = filterlocal(locres, vol, mask, Tracker["falloff"], myid, 0, nproc) if myid == 0: if (lx < nx): from fundamentals import fpol vol = fpol(vol, nx, nx, nx) vol = threshold(vol) vol = filt_btwl(vol, 0.38, 0.5) # This will have to be corrected. Util.mul_img(vol, mask3D) del mask3D # vol.write_image('toto%03d.hdf'%iter) else: vol = model_blank(nx, nx, nx) else: if myid == 0: #from utilities import write_text_file #write_text_file(rops_table(vol,1),"goo.txt") stat = Util.infomask(vol, mask3D, False) vol -= stat[0] Util.mul_scalar(vol, 1.0 / stat[1]) vol = threshold(vol) vol = filt_btwl(vol, 0.38, 0.5) # This will have to be corrected. Util.mul_img(vol, mask3D) del mask3D # vol.write_image('toto%03d.hdf'%iter) # broadcast volume bcast_EMData_to_all(vol, myid, 0, comm=mpi_comm) #========================================================================= return vol
def dovolume(ref_data): from utilities import print_msg, read_text_row from filter import fit_tanh, filt_tanl from fundamentals import fshift from morphology import threshold # Prepare the reference in 3D alignment, this function corresponds to what do_volume does. # Input: list ref_data # 0 - mask # 1 - center flag # 2 - raw average # 3 - fsc result # Output: filtered, centered, and masked reference image # apply filtration (FSC) to reference image: global ref_ali2d_counter ref_ali2d_counter += 1 fl = ref_data[2].cmp("dot", ref_data[2], { "negative": 0, "mask": ref_data[0] }) print_msg("do_volume user function Step = %5d GOAL = %10.3e\n" % (ref_ali2d_counter, fl)) stat = Util.infomask(ref_data[2], ref_data[0], False) vol = ref_data[2] - stat[0] Util.mul_scalar(vol, 1.0 / stat[1]) vol = threshold(vol) #Util.mul_img(vol, ref_data[0]) try: aa = read_text_row("flaa.txt")[0] fl = aa[0] aa = aa[1] except: fl = 0.4 aa = 0.2 msg = "Tangent filter: cut-off frequency = %10.3f fall-off = %10.3f\n" % ( fl, aa) print_msg(msg) from utilities import read_text_file from fundamentals import rops_table, fftip, fft from filter import filt_table, filt_btwl fftip(vol) try: rt = read_text_file("pwreference.txt") ro = rops_table(vol) # Here unless I am mistaken it is enough to take the beginning of the reference pw. for i in xrange(1, len(ro)): ro[i] = (rt[i] / ro[i])**0.5 vol = fft(filt_table(filt_tanl(vol, fl, aa), ro)) msg = "Power spectrum adjusted\n" print_msg(msg) except: vol = fft(filt_tanl(vol, fl, aa)) stat = Util.infomask(vol, ref_data[0], False) vol -= stat[0] Util.mul_scalar(vol, 1.0 / stat[1]) vol = threshold(vol) vol = filt_btwl(vol, 0.38, 0.5) Util.mul_img(vol, ref_data[0]) if ref_data[1] == 1: cs = volf.phase_cog() msg = "Center x = %10.3f Center y = %10.3f Center z = %10.3f\n" % ( cs[0], cs[1], cs[2]) print_msg(msg) volf = fshift(volf, -cs[0], -cs[1], -cs[2]) else: cs = [0.0] * 3 return vol, cs
def main(): import sys import os import math import random import pyemtbx.options import time from random import random, seed, randint from optparse import OptionParser progname = os.path.basename(sys.argv[0]) usage = progname + """ [options] <inputfile> <outputfile> Generic 2-D image processing programs. Functionality: 1. Phase flip a stack of images and write output to new file: sxprocess.py input_stack.hdf output_stack.hdf --phase_flip 2. Resample (decimate or interpolate up) images (2D or 3D) in a stack to change the pixel size. The window size will change accordingly. sxprocess input.hdf output.hdf --changesize --ratio=0.5 3. Compute average power spectrum of a stack of 2D images with optional padding (option wn) with zeroes or a 3-D volume. sxprocess.py input_stack.hdf powerspectrum.hdf --pw [--wn=1024] 4. Generate a stack of projections bdb:data and micrographs with prefix mic (i.e., mic0.hdf, mic1.hdf etc) from structure input_structure.hdf, with CTF applied to both projections and micrographs: sxprocess.py input_structure.hdf data mic --generate_projections format="bdb":apix=5.2:CTF=True:boxsize=64 5. Retrieve original image numbers in the selected ISAC group (here group 12 from generation 3): sxprocess.py bdb:test3 class_averages_generation_3.hdf list3_12.txt --isacgroup=12 --params=originalid 6. Retrieve original image numbers of images listed in ISAC output stack of averages: sxprocess.py select1.hdf ohk.txt 7. Adjust rotationally averaged power spectrum of an image to that of a reference image or a reference 1D power spectrum stored in an ASCII file. Optionally use a tangent low-pass filter. Also works for a stack of images, in which case the output is also a stack. sxprocess.py vol.hdf ref.hdf avol.hdf < 0.25 0.2> --adjpw sxprocess.py vol.hdf pw.txt avol.hdf < 0.25 0.2> --adjpw 8. Generate a 1D rotationally averaged power spectrum of an image. sxprocess.py vol.hdf --rotwp=rotpw.txt # Output will contain three columns: (1) rotationally averaged power spectrum (2) logarithm of the rotationally averaged power spectrum (3) integer line number (from zero to approximately to half the image size) 9. Apply 3D transformation (rotation and/or shift) to a set of orientation parameters associated with projection data. sxprocess.py --transfromparams=phi,theta,psi,tx,ty,tz input.txt output.txt The output file is then imported and 3D transformed volume computed: sxheader.py bdb:p --params=xform.projection --import=output.txt mpirun -np 2 sxrecons3d_n.py bdb:p tvol.hdf --MPI The reconstructed volume is in the position of the volume computed using the input.txt parameters and then transformed with rot_shift3D(vol, phi,theta,psi,tx,ty,tz) 10. Import ctf parameters from the output of sxcter into windowed particle headers. There are three possible input files formats: (1) all particles are in one stack, (2 aor 3) particles are in stacks, each stack corresponds to a single micrograph. In each case the particles should contain a name of the micrograph of origin stores using attribute name 'ptcl_source_image'. Normally this is done by e2boxer.py during windowing. Particles whose defocus or astigmatism error exceed set thresholds will be skipped, otherwise, virtual stacks with the original way preceded by G will be created. sxprocess.py --input=bdb:data --importctf=outdir/partres --defocuserror=10.0 --astigmatismerror=5.0 # Output will be a vritual stack bdb:Gdata sxprocess.py --input="bdb:directory/stacks*" --importctf=outdir/partres --defocuserror=10.0 --astigmatismerror=5.0 To concatenate output files: cd directory e2bdb.py . --makevstack=bdb:allparticles --filt=G IMPORTANT: Please do not move (or remove!) any input/intermediate EMAN2DB files as the information is linked between them. 11. Scale 3D shifts. The shifts in the input five columns text file with 3D orientation parameters will be DIVIDED by the scale factor sxprocess.py orientationparams.txt scaledparams.txt scale=0.5 12. Generate 3D mask from a given 3-D volume automatically or using threshold provided by user. 13. Postprocess 3-D or 2-D images: for 3-D volumes: calculate FSC with provided mask; weight summed volume with FSC; estimate B-factor from FSC weighted summed two volumes; apply negative B-factor to the weighted volume. for 2-D images: calculate B-factor and apply negative B-factor to 2-D images. 14. Winow stack file -reduce size of images without changing the pixel size. """ parser = OptionParser(usage,version=SPARXVERSION) parser.add_option("--order", action="store_true", help="Two arguments are required: name of input stack and desired name of output stack. The output stack is the input stack sorted by similarity in terms of cross-correlation coefficent.", default=False) parser.add_option("--order_lookup", action="store_true", help="Test/Debug.", default=False) parser.add_option("--order_metropolis", action="store_true", help="Test/Debug.", default=False) parser.add_option("--order_pca", action="store_true", help="Test/Debug.", default=False) parser.add_option("--initial", type="int", default=-1, help="Specifies which image will be used as an initial seed to form the chain. (default = 0, means the first image)") parser.add_option("--circular", action="store_true", help="Select circular ordering (fisr image has to be similar to the last", default=False) parser.add_option("--radius", type="int", default=-1, help="Radius of a circular mask for similarity based ordering") parser.add_option("--changesize", action="store_true", help="resample (decimate or interpolate up) images (2D or 3D) in a stack to change the pixel size.", default=False) parser.add_option("--ratio", type="float", default=1.0, help="The ratio of new to old image size (if <1 the pixel size will increase and image size decrease, if>1, the other way round") parser.add_option("--pw", action="store_true", help="compute average power spectrum of a stack of 2-D images with optional padding (option wn) with zeroes", default=False) parser.add_option("--wn", type="int", default=-1, help="Size of window to use (should be larger/equal than particle box size, default padding to max(nx,ny))") parser.add_option("--phase_flip", action="store_true", help="Phase flip the input stack", default=False) parser.add_option("--makedb", metavar="param1=value1:param2=value2", type="string", action="append", help="One argument is required: name of key with which the database will be created. Fill in database with parameters specified as follows: --makedb param1=value1:param2=value2, e.g. 'gauss_width'=1.0:'pixel_input'=5.2:'pixel_output'=5.2:'thr_low'=1.0") parser.add_option("--generate_projections", metavar="param1=value1:param2=value2", type="string", action="append", help="Three arguments are required: name of input structure from which to generate projections, desired name of output projection stack, and desired prefix for micrographs (e.g. if prefix is 'mic', then micrographs mic0.hdf, mic1.hdf etc will be generated). Optional arguments specifying format, apix, box size and whether to add CTF effects can be entered as follows after --generate_projections: format='bdb':apix=5.2:CTF=True:boxsize=100, or format='hdf', etc., where format is bdb or hdf, apix (pixel size) is a float, CTF is True or False, and boxsize denotes the dimension of the box (assumed to be a square). If an optional parameter is not specified, it will default as follows: format='bdb', apix=2.5, CTF=False, boxsize=64.") parser.add_option("--isacgroup", type="int", help="Retrieve original image numbers in the selected ISAC group. See ISAC documentation for details.", default=-1) parser.add_option("--isacselect", action="store_true", help="Retrieve original image numbers of images listed in ISAC output stack of averages. See ISAC documentation for details.", default=False) parser.add_option("--params", type="string", default=None, help="Name of header of parameter, which one depends on specific option") parser.add_option("--adjpw", action="store_true", help="Adjust rotationally averaged power spectrum of an image", default=False) parser.add_option("--rotpw", type="string", default=None, help="Name of the text file to contain rotationally averaged power spectrum of the input image.") parser.add_option("--transformparams", type="string", default=None, help="Transform 3D projection orientation parameters using six 3D parameters (phi, theta,psi,sx,sy,sz). Input: --transformparams=45.,66.,12.,-2,3,-5.5 desired six transformation of the reconstructed structure. Output: file with modified orientation parameters.") # import ctf estimates done using cter parser.add_option("--input", type="string", default= None, help="Input particles.") parser.add_option("--importctf", type="string", default= None, help="Name of the file containing CTF parameters produced by sxcter.") parser.add_option("--defocuserror", type="float", default=1000000.0, help="Exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocuserror percent. The error is computed as (std dev defocus)/defocus*100%") parser.add_option("--astigmatismerror", type="float", default=360.0, help="Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatismerror degrees.") # import ctf estimates done using cter parser.add_option("--scale", type="float", default=-1.0, help="Divide shifts in the input 3D orientation parameters text file by the scale factor.") # generate adaptive mask from an given 3-D volume parser.add_option("--adaptive_mask", action="store_true", help="create adavptive 3-D mask from a given volume", default=False) parser.add_option("--nsigma", type="float", default= 1., help="number of times of sigma of the input volume to obtain the the large density cluster") parser.add_option("--ndilation", type="int", default= 3, help="number of times of dilation applied to the largest cluster of density") parser.add_option("--kernel_size", type="int", default= 11, help="convolution kernel for smoothing the edge of the mask") parser.add_option("--gauss_standard_dev", type="int", default= 9, help="stanadard deviation value to generate Gaussian edge") parser.add_option("--threshold", type="float", default= 9999., help="threshold provided by user to binarize input volume") parser.add_option("--ne", type="int", default= 0, help="number of times to erode the binarized input image") parser.add_option("--nd", type="int", default= 0, help="number of times to dilate the binarized input image") parser.add_option("--postprocess", action="store_true", help="postprocess unfiltered odd, even 3-D volumes",default=False) parser.add_option("--fsc_weighted", action="store_true", help="postprocess unfiltered odd, even 3-D volumes") parser.add_option("--low_pass_filter", action="store_true", default=False, help="postprocess unfiltered odd, even 3-D volumes") parser.add_option("--ff", type="float", default=.25, help="low pass filter stop band frequency in absolute unit") parser.add_option("--aa", type="float", default=.1, help="low pass filter falloff" ) parser.add_option("--mask", type="string", help="input mask file", default=None) parser.add_option("--output", type="string", help="output file name", default="postprocessed.hdf") parser.add_option("--pixel_size", type="float", help="pixel size of the data", default=1.0) parser.add_option("--B_start", type="float", help="starting frequency in Angstrom for B-factor estimation", default=10.) parser.add_option("--FSC_cutoff", type="float", help="stop frequency in Angstrom for B-factor estimation", default=0.143) parser.add_option("--2d", action="store_true", help="postprocess isac 2-D averaged images",default=False) parser.add_option("--window_stack", action="store_true", help="window stack images using a smaller window size", default=False) parser.add_option("--box", type="int", default= 0, help="the new window size ") (options, args) = parser.parse_args() global_def.BATCH = True if options.phase_flip: nargs = len(args) if nargs != 2: print "must provide name of input and output file!" return from EMAN2 import Processor instack = args[0] outstack = args[1] nima = EMUtil.get_image_count(instack) from filter import filt_ctf for i in xrange(nima): img = EMData() img.read_image(instack, i) try: ctf = img.get_attr('ctf') except: print "no ctf information in input stack! Exiting..." return dopad = True sign = 1 binary = 1 # phase flip assert img.get_ysize() > 1 dict = ctf.to_dict() dz = dict["defocus"] cs = dict["cs"] voltage = dict["voltage"] pixel_size = dict["apix"] b_factor = dict["bfactor"] ampcont = dict["ampcont"] dza = dict["dfdiff"] azz = dict["dfang"] if dopad and not img.is_complex(): ip = 1 else: ip = 0 params = {"filter_type": Processor.fourier_filter_types.CTF_, "defocus" : dz, "Cs": cs, "voltage": voltage, "Pixel_size": pixel_size, "B_factor": b_factor, "amp_contrast": ampcont, "dopad": ip, "binary": binary, "sign": sign, "dza": dza, "azz":azz} tmp = Processor.EMFourierFilter(img, params) tmp.set_attr_dict({"ctf": ctf}) tmp.write_image(outstack, i) elif options.changesize: nargs = len(args) if nargs != 2: ERROR("must provide name of input and output file!", "change size", 1) return from utilities import get_im instack = args[0] outstack = args[1] sub_rate = float(options.ratio) nima = EMUtil.get_image_count(instack) from fundamentals import resample for i in xrange(nima): resample(get_im(instack, i), sub_rate).write_image(outstack, i) elif options.isacgroup>-1: nargs = len(args) if nargs != 3: ERROR("Three files needed on input!", "isacgroup", 1) return from utilities import get_im instack = args[0] m=get_im(args[1],int(options.isacgroup)).get_attr("members") l = [] for k in m: l.append(int(get_im(args[0],k).get_attr(options.params))) from utilities import write_text_file write_text_file(l, args[2]) elif options.isacselect: nargs = len(args) if nargs != 2: ERROR("Two files needed on input!", "isacgroup", 1) return from utilities import get_im nima = EMUtil.get_image_count(args[0]) m = [] for k in xrange(nima): m += get_im(args[0],k).get_attr("members") m.sort() from utilities import write_text_file write_text_file(m, args[1]) elif options.pw: nargs = len(args) if nargs < 2: ERROR("must provide name of input and output file!", "pw", 1) return from utilities import get_im, write_text_file from fundamentals import rops_table d = get_im(args[0]) ndim = d.get_ndim() if ndim ==3: pw = rops_table(d) write_text_file(pw, args[1]) else: nx = d.get_xsize() ny = d.get_ysize() if nargs ==3: mask = get_im(args[2]) wn = int(options.wn) if wn == -1: wn = max(nx, ny) else: if( (wn<nx) or (wn<ny) ): ERROR("window size cannot be smaller than the image size","pw",1) n = EMUtil.get_image_count(args[0]) from utilities import model_blank, model_circle, pad from EMAN2 import periodogram p = model_blank(wn,wn) for i in xrange(n): d = get_im(args[0], i) if nargs==3: d *=mask st = Util.infomask(d, None, True) d -= st[0] p += periodogram(pad(d, wn, wn, 1, 0.)) p /= n p.write_image(args[1]) elif options.adjpw: if len(args) < 3: ERROR("filt_by_rops input target output fl aa (the last two are optional parameters of a low-pass filter)","adjpw",1) return img_stack = args[0] from math import sqrt from fundamentals import rops_table, fft from utilities import read_text_file, get_im from filter import filt_tanl, filt_table if( args[1][-3:] == 'txt'): rops_dst = read_text_file( args[1] ) else: rops_dst = rops_table(get_im( args[1] )) out_stack = args[2] if(len(args) >4): fl = float(args[3]) aa = float(args[4]) else: fl = -1.0 aa = 0.0 nimage = EMUtil.get_image_count( img_stack ) for i in xrange(nimage): img = fft(get_im(img_stack, i) ) rops_src = rops_table(img) assert len(rops_dst) == len(rops_src) table = [0.0]*len(rops_dst) for j in xrange( len(rops_dst) ): table[j] = sqrt( rops_dst[j]/rops_src[j] ) if( fl > 0.0): img = filt_tanl(img, fl, aa) img = fft(filt_table(img, table)) img.write_image(out_stack, i) elif options.rotpw != None: if len(args) != 1: ERROR("Only one input permitted","rotpw",1) return from utilities import write_text_file, get_im from fundamentals import rops_table from math import log10 t = rops_table(get_im(args[0])) x = range(len(t)) r = [0.0]*len(x) for i in x: r[i] = log10(t[i]) write_text_file([t,r,x],options.rotpw) elif options.transformparams != None: if len(args) != 2: ERROR("Please provide names of input and output files with orientation parameters","transformparams",1) return from utilities import read_text_row, write_text_row transf = [0.0]*6 spl=options.transformparams.split(',') for i in xrange(len(spl)): transf[i] = float(spl[i]) write_text_row( rotate_shift_params(read_text_row(args[0]), transf) , args[1]) elif options.makedb != None: nargs = len(args) if nargs != 1: print "must provide exactly one argument denoting database key under which the input params will be stored" return dbkey = args[0] print "database key under which params will be stored: ", dbkey gbdb = js_open_dict("e2boxercache/gauss_box_DB.json") parmstr = 'dummy:'+options.makedb[0] (processorname, param_dict) = parsemodopt(parmstr) dbdict = {} for pkey in param_dict: if (pkey == 'invert_contrast') or (pkey == 'use_variance'): if param_dict[pkey] == 'True': dbdict[pkey] = True else: dbdict[pkey] = False else: dbdict[pkey] = param_dict[pkey] gbdb[dbkey] = dbdict elif options.generate_projections: nargs = len(args) if nargs != 3: ERROR("Must provide name of input structure(s) from which to generate projections, name of output projection stack, and prefix for output micrographs."\ "sxprocess - generate projections",1) return inpstr = args[0] outstk = args[1] micpref = args[2] parmstr = 'dummy:'+options.generate_projections[0] (processorname, param_dict) = parsemodopt(parmstr) parm_CTF = False parm_format = 'bdb' parm_apix = 2.5 if 'CTF' in param_dict: if param_dict['CTF'] == 'True': parm_CTF = True if 'format' in param_dict: parm_format = param_dict['format'] if 'apix' in param_dict: parm_apix = float(param_dict['apix']) boxsize = 64 if 'boxsize' in param_dict: boxsize = int(param_dict['boxsize']) print "pixel size: ", parm_apix, " format: ", parm_format, " add CTF: ", parm_CTF, " box size: ", boxsize scale_mult = 2500 sigma_add = 1.5 sigma_proj = 30.0 sigma2_proj = 17.5 sigma_gauss = 0.3 sigma_mic = 30.0 sigma2_mic = 17.5 sigma_gauss_mic = 0.3 if 'scale_mult' in param_dict: scale_mult = float(param_dict['scale_mult']) if 'sigma_add' in param_dict: sigma_add = float(param_dict['sigma_add']) if 'sigma_proj' in param_dict: sigma_proj = float(param_dict['sigma_proj']) if 'sigma2_proj' in param_dict: sigma2_proj = float(param_dict['sigma2_proj']) if 'sigma_gauss' in param_dict: sigma_gauss = float(param_dict['sigma_gauss']) if 'sigma_mic' in param_dict: sigma_mic = float(param_dict['sigma_mic']) if 'sigma2_mic' in param_dict: sigma2_mic = float(param_dict['sigma2_mic']) if 'sigma_gauss_mic' in param_dict: sigma_gauss_mic = float(param_dict['sigma_gauss_mic']) from filter import filt_gaussl, filt_ctf from utilities import drop_spider_doc, even_angles, model_gauss, delete_bdb, model_blank,pad,model_gauss_noise,set_params2D, set_params_proj from projection import prep_vol,prgs seed(14567) delta = 29 angles = even_angles(delta, 0.0, 89.9, 0.0, 359.9, "S") nangle = len(angles) modelvol = [] nvlms = EMUtil.get_image_count(inpstr) from utilities import get_im for k in xrange(nvlms): modelvol.append(get_im(inpstr,k)) nx = modelvol[0].get_xsize() if nx != boxsize: ERROR("Requested box dimension does not match dimension of the input model.", \ "sxprocess - generate projections",1) nvol = 10 volfts = [[] for k in xrange(nvlms)] for k in xrange(nvlms): for i in xrange(nvol): sigma = sigma_add + random() # 1.5-2.5 addon = model_gauss(sigma, boxsize, boxsize, boxsize, sigma, sigma, 38, 38, 40 ) scale = scale_mult * (0.5+random()) vf, kb = prep_vol(modelvol[k] + scale*addon) volfts[k].append(vf) del vf, modelvol if parm_format == "bdb": stack_data = "bdb:"+outstk delete_bdb(stack_data) else: stack_data = outstk + ".hdf" Cs = 2.0 pixel = parm_apix voltage = 120.0 ampcont = 10.0 ibd = 4096/2-boxsize iprj = 0 width = 240 xstart = 8 + boxsize/2 ystart = 8 + boxsize/2 rowlen = 17 from random import randint params = [] for idef in xrange(3, 8): irow = 0 icol = 0 mic = model_blank(4096, 4096) defocus = idef * 0.5#0.2 if parm_CTF: astampl=defocus*0.15 astangl=50.0 ctf = generate_ctf([defocus, Cs, voltage, pixel, ampcont, 0.0, astampl, astangl]) for i in xrange(nangle): for k in xrange(12): dphi = 8.0*(random()-0.5) dtht = 8.0*(random()-0.5) psi = 360.0*random() phi = angles[i][0]+dphi tht = angles[i][1]+dtht s2x = 4.0*(random()-0.5) s2y = 4.0*(random()-0.5) params.append([phi, tht, psi, s2x, s2y]) ivol = iprj % nvol #imgsrc = randint(0,nvlms-1) imgsrc = iprj % nvlms proj = prgs(volfts[imgsrc][ivol], kb, [phi, tht, psi, -s2x, -s2y]) x = xstart + irow * width y = ystart + icol * width mic += pad(proj, 4096, 4096, 1, 0.0, x-2048, y-2048, 0) proj = proj + model_gauss_noise( sigma_proj, nx, nx ) if parm_CTF: proj = filt_ctf(proj, ctf) proj.set_attr_dict({"ctf":ctf, "ctf_applied":0}) proj = proj + filt_gaussl(model_gauss_noise(sigma2_proj, nx, nx), sigma_gauss) proj.set_attr("origimgsrc",imgsrc) proj.set_attr("test_id", iprj) # flags describing the status of the image (1 = true, 0 = false) set_params2D(proj, [0.0, 0.0, 0.0, 0, 1.0]) set_params_proj(proj, [phi, tht, psi, s2x, s2y]) proj.write_image(stack_data, iprj) icol += 1 if icol == rowlen: icol = 0 irow += 1 iprj += 1 mic += model_gauss_noise(sigma_mic,4096,4096) if parm_CTF: #apply CTF mic = filt_ctf(mic, ctf) mic += filt_gaussl(model_gauss_noise(sigma2_mic, 4096, 4096), sigma_gauss_mic) mic.write_image(micpref + "%1d.hdf" % (idef-3), 0) drop_spider_doc("params.txt", params) elif options.importctf != None: print ' IMPORTCTF ' from utilities import read_text_row,write_text_row from random import randint import subprocess grpfile = 'groupid%04d'%randint(1000,9999) ctfpfile = 'ctfpfile%04d'%randint(1000,9999) cterr = [options.defocuserror/100.0, options.astigmatismerror] ctfs = read_text_row(options.importctf) for kk in xrange(len(ctfs)): root,name = os.path.split(ctfs[kk][-1]) ctfs[kk][-1] = name[:-4] if(options.input[:4] != 'bdb:'): ERROR('Sorry, only bdb files implemented','importctf',1) d = options.input[4:] #try: str = d.index('*') #except: str = -1 from string import split import glob uu = os.path.split(d) uu = os.path.join(uu[0],'EMAN2DB',uu[1]+'.bdb') flist = glob.glob(uu) for i in xrange(len(flist)): root,name = os.path.split(flist[i]) root = root[:-7] name = name[:-4] fil = 'bdb:'+os.path.join(root,name) sourcemic = EMUtil.get_all_attributes(fil,'ptcl_source_image') nn = len(sourcemic) gctfp = [] groupid = [] for kk in xrange(nn): junk,name2 = os.path.split(sourcemic[kk]) name2 = name2[:-4] ctfp = [-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0] for ll in xrange(len(ctfs)): if(name2 == ctfs[ll][-1]): # found correct if(ctfs[ll][8]/ctfs[ll][0] <= cterr[0]): # acceptable defocus error ctfp = ctfs[ll][:8] if(ctfs[ll][10] > cterr[1] ): # error of astigmatism exceed the threshold, set astigmatism to zero. ctfp[6] = 0.0 ctfp[7] = 0.0 gctfp.append(ctfp) groupid.append(kk) break if(len(groupid) > 0): write_text_row(groupid, grpfile) write_text_row(gctfp, ctfpfile) cmd = "{} {} {} {}".format('e2bdb.py',fil,'--makevstack=bdb:'+root+'G'+name,'--list='+grpfile) #print cmd subprocess.call(cmd, shell=True) cmd = "{} {} {} {}".format('sxheader.py','bdb:'+root+'G'+name,'--params=ctf','--import='+ctfpfile) #print cmd subprocess.call(cmd, shell=True) else: print ' >>> Group ',name,' skipped.' cmd = "{} {} {}".format("rm -f",grpfile,ctfpfile) subprocess.call(cmd, shell=True) elif options.scale > 0.0: from utilities import read_text_row,write_text_row scale = options.scale nargs = len(args) if nargs != 2: print "Please provide names of input and output file!" return p = read_text_row(args[0]) for i in xrange(len(p)): p[i][3] /= scale p[i][4] /= scale write_text_row(p, args[1]) elif options.adaptive_mask: from utilities import get_im from morphology import adaptive_mask, binarize, erosion, dilation nsigma = options.nsigma ndilation = options.ndilation kernel_size = options.kernel_size gauss_standard_dev = options.gauss_standard_dev nargs = len(args) if nargs ==0: print " Create 3D mask from a given volume, either automatically or from the user provided threshold." elif nargs > 2: print "Too many inputs are given, try again!" return else: inputvol = get_im(args[0]) input_path, input_file_name = os.path.split(args[0]) input_file_name_root,ext=os.path.splitext(input_file_name) if nargs == 2: mask_file_name = args[1] else: mask_file_name = "adaptive_mask_for_"+input_file_name_root+".hdf" # Only hdf file is output. if options.threshold !=9999.: mask3d = binarize(inputvol, options.threshold) for i in xrange(options.ne): mask3d = erosion(mask3d) for i in xrange(options.nd): mask3d = dilation(mask3d) else: mask3d = adaptive_mask(inputvol, nsigma, ndilation, kernel_size, gauss_standard_dev) mask3d.write_image(mask_file_name) elif options.postprocess: from utilities import get_im from fundamentals import rot_avg_table from morphology import compute_bfactor,power from statistics import fsc from filter import filt_table, filt_gaussinv from EMAN2 import periodogram e1 = get_im(args[0],0) if e1.get_zsize()==1: nimage = EMUtil.get_image_count(args[0]) if options.mask !=None: m = get_im(options.mask) else: m = None for i in xrange(nimage): e1 = get_im(args[0],i) if m: e1 *=m guinerline = rot_avg_table(power(periodogram(e1),.5)) freq_max = 1/(2.*pixel_size) freq_min = 1./options.B_start b,junk=compute_bfactor(guinerline, freq_min, freq_max, pixel_size) tmp = b/pixel_size**2 sigma_of_inverse=sqrt(2./tmp) e1 = filt_gaussinv(e1,sigma_of_inverse) if options.low_pass_filter: from filter import filt_tanl e1 =filt_tanl(e1,options.ff, options.aa) e1.write_image(options.output) else: nargs = len(args) e1 = get_im(args[0]) if nargs >1: e2 = get_im(args[1]) if options.mask !=None: m = get_im(options.mask) else: m =None pixel_size = options.pixel_size from math import sqrt if m !=None: e1 *=m if nargs >1 :e2 *=m if options.fsc_weighted: frc = fsc(e1,e2,1) ## FSC is done on masked two images #### FSC weighting sqrt((2.*fsc)/(1+fsc)); fil = len(frc[1])*[None] for i in xrange(len(fil)): if frc[1][i]>=options.FSC_cutoff: tmp = frc[1][i] else: tmp = 0.0 fil[i] = sqrt(2.*tmp/(1.+tmp)) if nargs>1: e1 +=e2 if options.fsc_weighted: e1=filt_table(e1,fil) guinerline = rot_avg_table(power(periodogram(e1),.5)) freq_max = 1/(2.*pixel_size) freq_min = 1./options.B_start b,junk = compute_bfactor(guinerline, freq_min, freq_max, pixel_size) tmp = b/pixel_size**2 sigma_of_inverse=sqrt(2./tmp) e1 = filt_gaussinv(e1,sigma_of_inverse) if options.low_pass_filter: from filter import filt_tanl e1 =filt_tanl(e1,options.ff, options.aa) e1.write_image(options.output) elif options.window_stack: nargs = len(args) if nargs ==0: print " Reduce image size of a stack" return else: output_stack_name = None inputstack = args[0] if nargs ==2:output_stack_name = args[1] input_path,input_file_name=os.path.split(inputstack) input_file_name_root,ext=os.path.splitext(input_file_name) if input_file_name_root[0:3]=="bdb":stack_is_bdb= True else: stack_is_bdb= False if output_stack_name is None: if stack_is_bdb: output_stack_name ="bdb:reduced_"+input_file_name_root[4:] else:output_stack_name = "reduced_"+input_file_name_root+".hdf" # Only hdf file is output. nimage = EMUtil.get_image_count(inputstack) from fundamentals import window2d for i in xrange(nimage): image = EMData() image.read_image(inputstack,i) w = window2d(image,options.box,options.box) w.write_image(output_stack_name,i) else: ERROR("Please provide option name","sxprocess.py",1)
def main(): from optparse import OptionParser from global_def import SPARXVERSION from EMAN2 import EMData from logger import Logger, BaseLogger_Files import sys, os, time global Tracker, Blockdata from global_def import ERROR progname = os.path.basename(sys.argv[0]) usage = progname + " --output_dir=output_dir --isac_dir=output_dir_of_isac " parser = OptionParser(usage, version=SPARXVERSION) parser.add_option("--pw_adjustment", type ="string", default ='analytical_model', \ help="adjust power spectrum of 2-D averages to an analytic model. Other opions: no_adjustment; bfactor; a text file of 1D rotationally averaged PW") #### Four options for --pw_adjustment: # 1> analytical_model(default); # 2> no_adjustment; # 3> bfactor; # 4> adjust_to_given_pw2(user has to provide a text file that contains 1D rotationally averaged PW) # options in common parser.add_option( "--isac_dir", type="string", default='', help="ISAC run output directory, input directory for this command") parser.add_option( "--output_dir", type="string", default='', help="output directory where computed averages are saved") parser.add_option( "--pixel_size", type="float", default=-1.0, help= "pixel_size of raw images. one can put 1.0 in case of negative stain data" ) parser.add_option( "--fl", type="float", default=-1.0, help= "low pass filter, = -1.0, not applied; =0.0, using FH1 (initial resolution), = 1.0 using FH2 (resolution after local alignment), or user provided value in absolute freqency [0.0:0.5]" ) parser.add_option("--stack", type="string", default="", help="data stack used in ISAC") parser.add_option("--radius", type="int", default=-1, help="radius") parser.add_option("--xr", type="float", default=-1.0, help="local alignment search range") #parser.add_option("--ts", type ="float", default =1.0, help= "local alignment search step") parser.add_option("--fh", type="float", default=-1.0, help="local alignment high frequencies limit") #parser.add_option("--maxit", type ="int", default =5, help= "local alignment iterations") parser.add_option("--navg", type="int", default=1000000, help="number of aveages") parser.add_option("--local_alignment", action="store_true", default=False, help="do local alignment") parser.add_option( "--noctf", action="store_true", default=False, help= "no ctf correction, useful for negative stained data. always ctf for cryo data" ) parser.add_option( "--B_start", type="float", default=45.0, help= "start frequency (Angstrom) of power spectrum for B_factor estimation") parser.add_option( "--Bfactor", type="float", default=-1.0, help= "User defined bactors (e.g. 25.0[A^2]). By default, the program automatically estimates B-factor. " ) (options, args) = parser.parse_args(sys.argv[1:]) adjust_to_analytic_model = False adjust_to_given_pw2 = False B_enhance = False no_adjustment = False if options.pw_adjustment == 'analytical_model': adjust_to_analytic_model = True elif options.pw_adjustment == 'no_adjustment': no_adjustment = True elif options.pw_adjustment == 'bfactor': B_enhance = True else: adjust_to_given_pw2 = True from utilities import get_im, bcast_number_to_all, write_text_file, read_text_file, wrap_mpi_bcast, write_text_row from utilities import cmdexecute from filter import filt_tanl from logger import Logger, BaseLogger_Files import user_functions import string from string import split, atoi, atof import json mpi_init(0, []) nproc = mpi_comm_size(MPI_COMM_WORLD) myid = mpi_comm_rank(MPI_COMM_WORLD) Blockdata = {} # MPI stuff Blockdata["nproc"] = nproc Blockdata["myid"] = myid Blockdata["main_node"] = 0 Blockdata["shared_comm"] = mpi_comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL) Blockdata["myid_on_node"] = mpi_comm_rank(Blockdata["shared_comm"]) Blockdata["no_of_processes_per_group"] = mpi_comm_size( Blockdata["shared_comm"]) masters_from_groups_vs_everything_else_comm = mpi_comm_split( MPI_COMM_WORLD, Blockdata["main_node"] == Blockdata["myid_on_node"], Blockdata["myid_on_node"]) Blockdata["color"], Blockdata["no_of_groups"], balanced_processor_load_on_nodes = get_colors_and_subsets(Blockdata["main_node"], MPI_COMM_WORLD, Blockdata["myid"], \ Blockdata["shared_comm"], Blockdata["myid_on_node"], masters_from_groups_vs_everything_else_comm) # We need two nodes for processing of volumes Blockdata["node_volume"] = [ Blockdata["no_of_groups"] - 3, Blockdata["no_of_groups"] - 2, Blockdata["no_of_groups"] - 1 ] # For 3D stuff take three last nodes # We need two CPUs for processing of volumes, they are taken to be main CPUs on each volume # We have to send the two myids to all nodes so we can identify main nodes on two selected groups. Blockdata["nodes"] = [Blockdata["node_volume"][0]*Blockdata["no_of_processes_per_group"],Blockdata["node_volume"][1]*Blockdata["no_of_processes_per_group"], \ Blockdata["node_volume"][2]*Blockdata["no_of_processes_per_group"]] # End of Blockdata: sorting requires at least three nodes, and the used number of nodes be integer times of three global_def.BATCH = True global_def.MPI = True if adjust_to_given_pw2: checking_flag = 0 if (Blockdata["myid"] == Blockdata["main_node"]): if not os.path.exists(options.pw_adjustment): checking_flag = 1 checking_flag = bcast_number_to_all(checking_flag, Blockdata["main_node"], MPI_COMM_WORLD) if checking_flag == 1: ERROR("User provided power spectrum does not exist", "sxcompute_isac_avg.py", 1, Blockdata["myid"]) Tracker = {} Constants = {} Constants["isac_dir"] = options.isac_dir Constants["masterdir"] = options.output_dir Constants["pixel_size"] = options.pixel_size Constants["orgstack"] = options.stack Constants["radius"] = options.radius Constants["xrange"] = options.xr Constants["FH"] = options.fh Constants["low_pass_filter"] = options.fl #Constants["maxit"] = options.maxit Constants["navg"] = options.navg Constants["B_start"] = options.B_start Constants["Bfactor"] = options.Bfactor if adjust_to_given_pw2: Constants["modelpw"] = options.pw_adjustment Tracker["constants"] = Constants # ------------------------------------------------------------- # # Create and initialize Tracker dictionary with input options # State Variables #<<<---------------------->>>imported functions<<<--------------------------------------------- #x_range = max(Tracker["constants"]["xrange"], int(1./Tracker["ini_shrink"])+1) #y_range = x_range ####----------------------------------------------------------- # Create Master directory and associated subdirectories line = strftime("%Y-%m-%d_%H:%M:%S", localtime()) + " =>" if Tracker["constants"]["masterdir"] == Tracker["constants"]["isac_dir"]: masterdir = os.path.join(Tracker["constants"]["isac_dir"], "sharpen") else: masterdir = Tracker["constants"]["masterdir"] if (Blockdata["myid"] == Blockdata["main_node"]): msg = "Postprocessing ISAC 2D averages starts" print(line, "Postprocessing ISAC 2D averages starts") if not masterdir: timestring = strftime("_%d_%b_%Y_%H_%M_%S", localtime()) masterdir = "sharpen_" + Tracker["constants"]["isac_dir"] os.mkdir(masterdir) else: if os.path.exists(masterdir): print("%s already exists" % masterdir) else: os.mkdir(masterdir) subdir_path = os.path.join(masterdir, "ali2d_local_params_avg") if not os.path.exists(subdir_path): os.mkdir(subdir_path) subdir_path = os.path.join(masterdir, "params_avg") if not os.path.exists(subdir_path): os.mkdir(subdir_path) li = len(masterdir) else: li = 0 li = mpi_bcast(li, 1, MPI_INT, Blockdata["main_node"], MPI_COMM_WORLD)[0] masterdir = mpi_bcast(masterdir, li, MPI_CHAR, Blockdata["main_node"], MPI_COMM_WORLD) masterdir = string.join(masterdir, "") Tracker["constants"]["masterdir"] = masterdir log_main = Logger(BaseLogger_Files()) log_main.prefix = Tracker["constants"]["masterdir"] + "/" while not os.path.exists(Tracker["constants"]["masterdir"]): print("Node ", Blockdata["myid"], " waiting...", Tracker["constants"]["masterdir"]) sleep(1) mpi_barrier(MPI_COMM_WORLD) if (Blockdata["myid"] == Blockdata["main_node"]): init_dict = {} print(Tracker["constants"]["isac_dir"]) Tracker["directory"] = os.path.join(Tracker["constants"]["isac_dir"], "2dalignment") core = read_text_row( os.path.join(Tracker["directory"], "initial2Dparams.txt")) for im in range(len(core)): init_dict[im] = core[im] del core else: init_dict = 0 init_dict = wrap_mpi_bcast(init_dict, Blockdata["main_node"], communicator=MPI_COMM_WORLD) ### do_ctf = True if options.noctf: do_ctf = False if (Blockdata["myid"] == Blockdata["main_node"]): if do_ctf: print("CTF correction is on") else: print("CTF correction is off") if options.local_alignment: print("local refinement is on") else: print("local refinement is off") if B_enhance: print("Bfactor is to be applied on averages") elif adjust_to_given_pw2: print("PW of averages is adjusted to a given 1D PW curve") elif adjust_to_analytic_model: print("PW of averages is adjusted to analytical model") else: print("PW of averages is not adjusted") #Tracker["constants"]["orgstack"] = "bdb:"+ os.path.join(Tracker["constants"]["isac_dir"],"../","sparx_stack") image = get_im(Tracker["constants"]["orgstack"], 0) Tracker["constants"]["nnxo"] = image.get_xsize() if Tracker["constants"]["pixel_size"] == -1.0: print( "Pixel size value is not provided by user. extracting it from ctf header entry of the original stack." ) try: ctf_params = image.get_attr("ctf") Tracker["constants"]["pixel_size"] = ctf_params.apix except: ERROR( "Pixel size could not be extracted from the original stack.", "sxcompute_isac_avg.py", 1, Blockdata["myid"]) # action=1 - fatal error, exit ## Now fill in low-pass filter isac_shrink_path = os.path.join(Tracker["constants"]["isac_dir"], "README_shrink_ratio.txt") if not os.path.exists(isac_shrink_path): ERROR( "%s does not exist in the specified ISAC run output directory" % (isac_shrink_path), "sxcompute_isac_avg.py", 1, Blockdata["myid"]) # action=1 - fatal error, exit isac_shrink_file = open(isac_shrink_path, "r") isac_shrink_lines = isac_shrink_file.readlines() isac_shrink_ratio = float( isac_shrink_lines[5] ) # 6th line: shrink ratio (= [target particle radius]/[particle radius]) used in the ISAC run isac_radius = float( isac_shrink_lines[6] ) # 7th line: particle radius at original pixel size used in the ISAC run isac_shrink_file.close() print("Extracted parameter values") print("ISAC shrink ratio : {0}".format(isac_shrink_ratio)) print("ISAC particle radius : {0}".format(isac_radius)) Tracker["ini_shrink"] = isac_shrink_ratio else: Tracker["ini_shrink"] = 0.0 Tracker = wrap_mpi_bcast(Tracker, Blockdata["main_node"], communicator=MPI_COMM_WORLD) #print(Tracker["constants"]["pixel_size"], "pixel_size") x_range = max(Tracker["constants"]["xrange"], int(1. / Tracker["ini_shrink"] + 0.99999)) a_range = y_range = x_range if (Blockdata["myid"] == Blockdata["main_node"]): parameters = read_text_row( os.path.join(Tracker["constants"]["isac_dir"], "all_parameters.txt")) else: parameters = 0 parameters = wrap_mpi_bcast(parameters, Blockdata["main_node"], communicator=MPI_COMM_WORLD) params_dict = {} list_dict = {} #parepare params_dict #navg = min(Tracker["constants"]["navg"]*Blockdata["nproc"], EMUtil.get_image_count(os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf"))) navg = min( Tracker["constants"]["navg"], EMUtil.get_image_count( os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf"))) global_dict = {} ptl_list = [] memlist = [] if (Blockdata["myid"] == Blockdata["main_node"]): print("Number of averages computed in this run is %d" % navg) for iavg in range(navg): params_of_this_average = [] image = get_im( os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf"), iavg) members = sorted(image.get_attr("members")) memlist.append(members) for im in range(len(members)): abs_id = members[im] global_dict[abs_id] = [iavg, im] P = combine_params2( init_dict[abs_id][0], init_dict[abs_id][1], init_dict[abs_id][2], init_dict[abs_id][3], \ parameters[abs_id][0], parameters[abs_id][1]/Tracker["ini_shrink"], parameters[abs_id][2]/Tracker["ini_shrink"], parameters[abs_id][3]) if parameters[abs_id][3] == -1: print( "WARNING: Image #{0} is an unaccounted particle with invalid 2D alignment parameters and should not be the member of any classes. Please check the consitency of input dataset." .format(abs_id) ) # How to check what is wrong about mirror = -1 (Toshio 2018/01/11) params_of_this_average.append([P[0], P[1], P[2], P[3], 1.0]) ptl_list.append(abs_id) params_dict[iavg] = params_of_this_average list_dict[iavg] = members write_text_row( params_of_this_average, os.path.join(Tracker["constants"]["masterdir"], "params_avg", "params_avg_%03d.txt" % iavg)) ptl_list.sort() init_params = [None for im in range(len(ptl_list))] for im in range(len(ptl_list)): init_params[im] = [ptl_list[im]] + params_dict[global_dict[ ptl_list[im]][0]][global_dict[ptl_list[im]][1]] write_text_row( init_params, os.path.join(Tracker["constants"]["masterdir"], "init_isac_params.txt")) else: params_dict = 0 list_dict = 0 memlist = 0 params_dict = wrap_mpi_bcast(params_dict, Blockdata["main_node"], communicator=MPI_COMM_WORLD) list_dict = wrap_mpi_bcast(list_dict, Blockdata["main_node"], communicator=MPI_COMM_WORLD) memlist = wrap_mpi_bcast(memlist, Blockdata["main_node"], communicator=MPI_COMM_WORLD) # Now computing! del init_dict tag_sharpen_avg = 1000 ## always apply low pass filter to B_enhanced images to suppress noise in high frequencies enforced_to_H1 = False if B_enhance: if Tracker["constants"]["low_pass_filter"] == -1.0: enforced_to_H1 = True if navg < Blockdata["nproc"]: # Each CPU do one average ERROR("number of nproc is larger than number of averages", "sxcompute_isac_avg.py", 1, Blockdata["myid"]) else: FH_list = [[0, 0.0, 0.0] for im in range(navg)] image_start, image_end = MPI_start_end(navg, Blockdata["nproc"], Blockdata["myid"]) if Blockdata["myid"] == Blockdata["main_node"]: cpu_dict = {} for iproc in range(Blockdata["nproc"]): local_image_start, local_image_end = MPI_start_end( navg, Blockdata["nproc"], iproc) for im in range(local_image_start, local_image_end): cpu_dict[im] = iproc else: cpu_dict = 0 cpu_dict = wrap_mpi_bcast(cpu_dict, Blockdata["main_node"], communicator=MPI_COMM_WORLD) slist = [None for im in range(navg)] ini_list = [None for im in range(navg)] avg1_list = [None for im in range(navg)] avg2_list = [None for im in range(navg)] plist_dict = {} data_list = [None for im in range(navg)] if Blockdata["myid"] == Blockdata["main_node"]: if B_enhance: print( "Avg ID B-factor FH1(Res before ali) FH2(Res after ali)" ) else: print("Avg ID FH1(Res before ali) FH2(Res after ali)") for iavg in range(image_start, image_end): mlist = EMData.read_images(Tracker["constants"]["orgstack"], list_dict[iavg]) for im in range(len(mlist)): #mlist[im]= get_im(Tracker["constants"]["orgstack"], list_dict[iavg][im]) set_params2D(mlist[im], params_dict[iavg][im], xform="xform.align2d") if options.local_alignment: """ new_average1 = within_group_refinement([mlist[kik] for kik in range(0,len(mlist),2)], maskfile= None, randomize= False, ir=1.0, \ ou=Tracker["constants"]["radius"], rs=1.0, xrng=[x_range], yrng=[y_range], step=[Tracker["constants"]["xstep"]], \ dst=0.0, maxit=Tracker["constants"]["maxit"], FH=max(Tracker["constants"]["FH"], FH1), FF=0.02, method="") new_average2 = within_group_refinement([mlist[kik] for kik in range(1,len(mlist),2)], maskfile= None, randomize= False, ir=1.0, \ ou= Tracker["constants"]["radius"], rs=1.0, xrng=[ x_range], yrng=[y_range], step=[Tracker["constants"]["xstep"]], \ dst=0.0, maxit=Tracker["constants"]["maxit"], FH = max(Tracker["constants"]["FH"], FH1), FF=0.02, method="") new_avg, frc, plist = compute_average(mlist, Tracker["constants"]["radius"], do_ctf) """ new_avg, plist, FH2 = refinement_2d_local( mlist, Tracker["constants"]["radius"], a_range, x_range, y_range, CTF=do_ctf, SNR=1.0e10) plist_dict[iavg] = plist FH1 = -1.0 else: new_avg, frc, plist = compute_average( mlist, Tracker["constants"]["radius"], do_ctf) FH1 = get_optimistic_res(frc) FH2 = -1.0 #write_text_file(frc, os.path.join(Tracker["constants"]["masterdir"], "fsc%03d.txt"%iavg)) FH_list[iavg] = [iavg, FH1, FH2] if B_enhance: new_avg, gb = apply_enhancement( new_avg, Tracker["constants"]["B_start"], Tracker["constants"]["pixel_size"], Tracker["constants"]["Bfactor"]) print(" %6d %6.3f %4.3f %4.3f" % (iavg, gb, FH1, FH2)) elif adjust_to_given_pw2: roo = read_text_file(Tracker["constants"]["modelpw"], -1) roo = roo[0] # always on the first column new_avg = adjust_pw_to_model( new_avg, Tracker["constants"]["pixel_size"], roo) print(" %6d %4.3f %4.3f " % (iavg, FH1, FH2)) elif adjust_to_analytic_model: new_avg = adjust_pw_to_model( new_avg, Tracker["constants"]["pixel_size"], None) print(" %6d %4.3f %4.3f " % (iavg, FH1, FH2)) elif no_adjustment: pass if Tracker["constants"]["low_pass_filter"] != -1.0: if Tracker["constants"]["low_pass_filter"] == 0.0: low_pass_filter = FH1 elif Tracker["constants"]["low_pass_filter"] == 1.0: low_pass_filter = FH2 if not options.local_alignment: low_pass_filter = FH1 else: low_pass_filter = Tracker["constants"]["low_pass_filter"] if low_pass_filter >= 0.45: low_pass_filter = 0.45 new_avg = filt_tanl(new_avg, low_pass_filter, 0.02) else: # No low pass filter but if enforced if enforced_to_H1: new_avg = filt_tanl(new_avg, FH1, 0.02) if B_enhance: new_avg = fft(new_avg) new_avg.set_attr("members", list_dict[iavg]) new_avg.set_attr("n_objects", len(list_dict[iavg])) slist[iavg] = new_avg print( strftime("%Y-%m-%d_%H:%M:%S", localtime()) + " =>", "Refined average %7d" % iavg) ## send to main node to write mpi_barrier(MPI_COMM_WORLD) for im in range(navg): # avg if cpu_dict[im] == Blockdata[ "myid"] and Blockdata["myid"] != Blockdata["main_node"]: send_EMData(slist[im], Blockdata["main_node"], tag_sharpen_avg) elif cpu_dict[im] == Blockdata["myid"] and Blockdata[ "myid"] == Blockdata["main_node"]: slist[im].set_attr("members", memlist[im]) slist[im].set_attr("n_objects", len(memlist[im])) slist[im].write_image( os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), im) elif cpu_dict[im] != Blockdata["myid"] and Blockdata[ "myid"] == Blockdata["main_node"]: new_avg_other_cpu = recv_EMData(cpu_dict[im], tag_sharpen_avg) new_avg_other_cpu.set_attr("members", memlist[im]) new_avg_other_cpu.set_attr("n_objects", len(memlist[im])) new_avg_other_cpu.write_image( os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), im) if options.local_alignment: if cpu_dict[im] == Blockdata["myid"]: write_text_row( plist_dict[im], os.path.join(Tracker["constants"]["masterdir"], "ali2d_local_params_avg", "ali2d_local_params_avg_%03d.txt" % im)) if cpu_dict[im] == Blockdata[ "myid"] and cpu_dict[im] != Blockdata["main_node"]: wrap_mpi_send(plist_dict[im], Blockdata["main_node"], MPI_COMM_WORLD) wrap_mpi_send(FH_list, Blockdata["main_node"], MPI_COMM_WORLD) elif cpu_dict[im] != Blockdata["main_node"] and Blockdata[ "myid"] == Blockdata["main_node"]: dummy = wrap_mpi_recv(cpu_dict[im], MPI_COMM_WORLD) plist_dict[im] = dummy dummy = wrap_mpi_recv(cpu_dict[im], MPI_COMM_WORLD) FH_list[im] = dummy[im] else: if cpu_dict[im] == Blockdata[ "myid"] and cpu_dict[im] != Blockdata["main_node"]: wrap_mpi_send(FH_list, Blockdata["main_node"], MPI_COMM_WORLD) elif cpu_dict[im] != Blockdata["main_node"] and Blockdata[ "myid"] == Blockdata["main_node"]: dummy = wrap_mpi_recv(cpu_dict[im], MPI_COMM_WORLD) FH_list[im] = dummy[im] mpi_barrier(MPI_COMM_WORLD) mpi_barrier(MPI_COMM_WORLD) if options.local_alignment: if Blockdata["myid"] == Blockdata["main_node"]: ali3d_local_params = [None for im in range(len(ptl_list))] for im in range(len(ptl_list)): ali3d_local_params[im] = [ptl_list[im]] + plist_dict[ global_dict[ptl_list[im]][0]][global_dict[ptl_list[im]][1]] write_text_row( ali3d_local_params, os.path.join(Tracker["constants"]["masterdir"], "ali2d_local_params.txt")) write_text_row( FH_list, os.path.join(Tracker["constants"]["masterdir"], "FH_list.txt")) else: if Blockdata["myid"] == Blockdata["main_node"]: write_text_row( FH_list, os.path.join(Tracker["constants"]["masterdir"], "FH_list.txt")) mpi_barrier(MPI_COMM_WORLD) target_xr = 3 target_yr = 3 if (Blockdata["myid"] == 0): cmd = "{} {} {} {} {} {} {} {} {} {}".format("sxchains.py", os.path.join(Tracker["constants"]["masterdir"],"class_averages.hdf"),\ os.path.join(Tracker["constants"]["masterdir"],"junk.hdf"),os.path.join(Tracker["constants"]["masterdir"],"ordered_class_averages.hdf"),\ "--circular","--radius=%d"%Tracker["constants"]["radius"] , "--xr=%d"%(target_xr+1),"--yr=%d"%(target_yr+1),"--align", ">/dev/null") junk = cmdexecute(cmd) cmd = "{} {}".format( "rm -rf", os.path.join(Tracker["constants"]["masterdir"], "junk.hdf")) junk = cmdexecute(cmd) from mpi import mpi_finalize mpi_finalize() exit()
# Testing environment from bio_seq import BioSeq from utilities import read_text_file rna_seq = BioSeq(seq_type='RNA', seq=read_text_file('test_file.txt')) rna_seq.inverted_tandem_repeat(9)
def organize_micrographs(args): import glob import shutil from utilities import read_text_file # To make the execution exit upon fatal error by ERROR in global_def.py global_def.BATCH = True # ------------------------------------------------------------------------------------ # Prepare the variables for all sections # ------------------------------------------------------------------------------------ # Use short names for arguments and options src_mic_pattern = args.source_micrograph_pattern select_list_path = args.selection_list dst_dir = args.destination_directory # ------------------------------------------------------------------------------------ # Check error conditions # ------------------------------------------------------------------------------------ subcommand_name = "organize_micrographs" if src_mic_pattern.find("*") == -1: ERROR( "The source micrograph path pattern must contain wild card (*). Please correct source_micrograph_pattern argument and restart the program.", subcommand_name) # action=1 - fatal error, exit if os.path.splitext(select_list_path)[1] != ".txt": ERROR( "The extension of source micrograph selecting list file must \'.txt\'. Please choose a correct file path or change the file extension, then restart the program.", subcommand_name) # action=1 - fatal error, exit if not os.path.exists(select_list_path): ERROR( "The micrograph selecting list file does not exist. Please choose a correct file path and restart the program.", subcommand_name) # action=1 - fatal error, exit assert (os.path.exists(select_list_path)) # ------------------------------------------------------------------------------------ # Define operation mode information # ------------------------------------------------------------------------------------ # Micrograph basename pattern (directory path is removed from micrograph path pattern) mic_basename_pattern = os.path.basename(src_mic_pattern) src_dir = os.path.dirname(src_mic_pattern) record_dir = dst_dir # always use the original output directory for recording generated information # Swap input directory and output directory if necessary if not args.reverse: print(" ") print_progress("Running with Normal Operation Mode... ") else: assert (args.reverse) print(" ") print_progress("Running with Reverse Operation Mode... ") dst_dir = src_dir src_dir = record_dir src_mic_pattern = os.path.join(src_dir, mic_basename_pattern) print_progress("Source micrograph basename pattern : %s" % (src_mic_pattern)) print_progress("Source directory : %s" % (src_dir)) print_progress("Destination directory : %s" % (dst_dir)) print_progress("Recording directory : %s" % (record_dir)) print(" ") # -------------------------------------------------------------------------------- # Prepare variables # -------------------------------------------------------------------------------- # Define indices of selection list parameters i_enum = -1 i_enum += 1 idx_mic_list_mic_path = i_enum # The name or path of micrographs i_enum += 1 n_idx_mic_list = i_enum # Global entry dictionary (all possible entries from all lists) for all mic id substring global_entry_dict = {} # mic id substring is the key subkey_src_mic_path = "Source Micrograph Path" subkey_dst_mic_path = "Destination Micrograph Path" subkey_select_mic_basename = "Selected Micrograph Basename" # List keeps only id substrings of micrographs whose all necessary information are available valid_mic_id_substr_list = [] # Prefix and suffix of micrograph basename pattern # to find the head/tail indices of micrograph id substring mic_basename_tokens = mic_basename_pattern.split("*") assert (len(mic_basename_tokens) == 2) # Find head index of micrograph id substring mic_id_substr_head_idx = len(mic_basename_tokens[0]) # Set up output directory dst_mic_pattern = None if os.path.exists(dst_dir): print(" ") print_progress("The destination directory (%s) already exists. " % (dst_dir)) dst_mic_pattern = os.path.join(dst_dir, mic_basename_pattern) # -------------------------------------------------------------------------------- # Register micrograph id substrings found in source directory (specified by source micrograph path pattern) # and associated source micrograph path to the global entry dictionary # -------------------------------------------------------------------------------- # Generate the list of micrograph paths in the source directory print(" ") print_progress("Checking the source directory...") src_mic_path_list = glob.glob(src_mic_pattern) # Check error condition of source micrograph file path list print_progress("Found %d microgarphs in %s." % (len(src_mic_path_list), src_dir)) if len(src_mic_path_list) == 0: ERROR( "No micrograph files are found in the directory specified by the micrograph path pattern (%s). Please check source_micrograph_pattern argument and restart the program." % (src_dir), subcommand_name) # action=1 - fatal error, exit assert (len(src_mic_path_list) > 0) # Register micrograph id substrings to the global entry dictionary for src_mic_path in src_mic_path_list: # Find tail index of micrograph id substring and extract the substring from the micrograph name src_mic_basename = os.path.basename(src_mic_path) mic_id_substr_tail_idx = src_mic_basename.index(mic_basename_tokens[1]) mic_id_substr = src_mic_basename[ mic_id_substr_head_idx:mic_id_substr_tail_idx] assert (src_mic_path == src_mic_pattern.replace("*", mic_id_substr)) if not mic_id_substr in global_entry_dict: # print("MRK_DEBUG: Added new mic_id_substr (%s) to global_entry_dict from src_mic_path_list "%(mic_id_substr)) global_entry_dict[mic_id_substr] = {} assert (mic_id_substr in global_entry_dict) global_entry_dict[mic_id_substr][subkey_src_mic_path] = src_mic_path assert (len(global_entry_dict) > 0) # Clean up variables which won't be used anymore del src_mic_path_list # -------------------------------------------------------------------------------- # Register micrograph id substrings found in destination directory if any # and associated source micrograph path to the global entry dictionary # -------------------------------------------------------------------------------- if dst_mic_pattern is not None: assert (os.path.exists(dst_dir)) dst_mic_pattern = os.path.join(dst_dir, mic_basename_pattern) # Generate the list of micrograph paths in the output directory print(" ") print_progress("Checking the destination directory...") dst_mic_path_list = glob.glob(dst_mic_pattern) # Check error condition of destination micrograph file path list print_progress("Found %d microgarphs in %s." % (len(dst_mic_path_list), dst_dir)) # Register micrograph id substrings to the global entry dictionary for dst_mic_path in dst_mic_path_list: # Find tail index of micrograph id substring and extract the substring from the micrograph name dst_mic_basename = os.path.basename(dst_mic_path) mic_id_substr_tail_idx = dst_mic_basename.index( mic_basename_tokens[1]) mic_id_substr = dst_mic_basename[ mic_id_substr_head_idx:mic_id_substr_tail_idx] assert (dst_mic_path == dst_mic_pattern.replace( "*", mic_id_substr)) if not mic_id_substr in global_entry_dict: # print("MRK_DEBUG: Added new mic_id_substr (%s) to global_entry_dict from dst_mic_path_list "%(mic_id_substr)) global_entry_dict[mic_id_substr] = {} assert (mic_id_substr in global_entry_dict) global_entry_dict[mic_id_substr][ subkey_dst_mic_path] = dst_mic_path assert (len(global_entry_dict) > 0) # Clean up variables which won't be used anymore del dst_mic_path_list # -------------------------------------------------------------------------------- # Register micrograph id substrings found in the selection list # and associated micrograph basename to the global entry dictionary # -------------------------------------------------------------------------------- # Generate the list of select micrograph paths in the selection file select_mic_path_list = [] # Generate micrograph lists according to the execution mode print(" ") print_progress("Checking the selection list...") select_mic_path_list = read_text_file(select_list_path) # Check error condition of micrograph entry lists print_progress("Found %d microgarph entries in %s." % (len(select_mic_path_list), select_list_path)) if len(select_mic_path_list) == 0: ERROR( "No micrograph entries are found in the selection list file (%s). Please correct selection_list option and restart the program." % (select_list_path), subcommand_name) # action=1 - fatal error, exit assert (len(select_mic_path_list) > 0) select_mic_dir = os.path.dirname(select_mic_path_list[0]) if select_mic_dir != "": print_progress( " NOTE: Program disregards the directory paths in the source selection list (%s)." % (select_mic_dir)) # Register micrograph id substrings to the global entry dictionary for select_mic_path in select_mic_path_list: # Find tail index of micrograph id substring and extract the substring from the micrograph name select_mic_basename = os.path.basename(select_mic_path) mic_id_substr_tail_idx = select_mic_basename.index( mic_basename_tokens[1]) mic_id_substr = select_mic_basename[ mic_id_substr_head_idx:mic_id_substr_tail_idx] assert (select_mic_basename == mic_basename_pattern.replace( "*", mic_id_substr)) if not mic_id_substr in global_entry_dict: # print("MRK_DEBUG: Added new mic_id_substr (%s) to global_entry_dict from select_mic_path_list "%(mic_id_substr)) global_entry_dict[mic_id_substr] = {} assert (mic_id_substr in global_entry_dict) global_entry_dict[mic_id_substr][ subkey_select_mic_basename] = select_mic_basename assert (len(global_entry_dict) > 0) # Clean up variables which won't be used anymore del select_mic_path_list # -------------------------------------------------------------------------------- # Clean up variables related to registration to the global entry dictionary # -------------------------------------------------------------------------------- del mic_basename_tokens del mic_id_substr_head_idx # -------------------------------------------------------------------------------- # Create the list containing only valid micrograph id substrings # -------------------------------------------------------------------------------- print(" ") print_progress("Checking consistency of the provided dataset ...") if dst_mic_pattern is None: assert (not os.path.exists(dst_dir)) # Prepare lists to keep track of invalid (rejected) micrographs no_src_mic_id_substr_list = [] # Loop over substring id list for mic_id_substr in global_entry_dict: mic_id_entry = global_entry_dict[mic_id_substr] warinnig_messages = [] # selected micrograph basename must have been registed always . if subkey_select_mic_basename in mic_id_entry: # Check if associated input micrograph exists if not subkey_src_mic_path in mic_id_entry: mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) warinnig_messages.append( " associated micrograph (%s) does not exist in the source directory (%s)." % (mic_basename, src_dir)) no_src_mic_id_substr_list.append(mic_id_substr) if len(warinnig_messages) > 0: print_progress( "WARNING!!! Micrograph ID %s has problems with consistency among the provided dataset:" % (mic_id_substr)) for warinnig_message in warinnig_messages: print_progress(warinnig_message) print_progress(" Ignores this as an invalid entry.") else: # print("MRK_DEBUG: adding mic_id_substr := ", mic_id_substr) valid_mic_id_substr_list.append(mic_id_substr) # else: # assert (not subkey_select_mic_basename in mic_id_entry) # # This entry is not in the selection list. Do nothing # Check the input dataset consistency and save the result to a text file, if necessary. if args.check_consistency: # Create destination directory assert (not os.path.exists(dst_dir)) os.mkdir(dst_dir) assert (os.path.exists(dst_dir)) # Open the consistency check file mic_consistency_check_info_path = os.path.join( record_dir, "mic_consistency_check_info_%s.txt" % (get_time_stamp_suffix())) print(" ") print_progress( "Generating consistency report of the provided dataset in %s..." % (mic_consistency_check_info_path)) mic_consistency_check_info_file = open( mic_consistency_check_info_path, "w") mic_consistency_check_info_file.write( "# The consistency information about micrograph IDs that might have problmes with consistency among the provided dataset.\n" ) mic_consistency_check_info_file.write("# \n") # Loop over substring id list for mic_id_substr in global_entry_dict: mic_id_entry = global_entry_dict[mic_id_substr] consistency_messages = [] # Check if associated micrograph path exists in source directory if not subkey_src_mic_path in mic_id_entry: mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) consistency_messages.append( " associated micrograph (%s) does not exist in the source directory (%s)." % (mic_basename, src_dir)) # Check if associated micrograph basename exists in selection list if not subkey_select_mic_basename in mic_id_entry: mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) consistency_messages.append( " associated micrograph (%s) is not in the selection list (%s)." % (mic_basename, select_list_path)) if len(consistency_messages) > 0: mic_consistency_check_info_file.write( "Micrograph ID %s might have problems with consistency among the provided dataset:\n" % (mic_id_substr)) for consistency_message in consistency_messages: mic_consistency_check_info_file.write( consistency_message) mic_consistency_check_info_file.write("\n") # Close the consistency check file, if necessary mic_consistency_check_info_file.flush() mic_consistency_check_info_file.close() # Since mic_id_substr is once stored as the key of global_entry_dict and extracted with the key order # we need sort the valid_mic_id_substr_list here # print("MRK_DEBUG: before sort, valid_mic_id_substr_list := ", valid_mic_id_substr_list) valid_mic_id_substr_list.sort() # print("MRK_DEBUG: after sort, valid_mic_id_substr_list := ", valid_mic_id_substr_list) # -------------------------------------------------------------------------------- # Print out the summary of input consistency # -------------------------------------------------------------------------------- print(" ") print_progress("Summary of consistency check for provided dataset...") print_progress("Detected : %6d" % (len(global_entry_dict))) print_progress("Valid : %6d" % (len(valid_mic_id_substr_list))) print_progress("Rejected by no source micrographs : %6d" % (len(no_src_mic_id_substr_list))) print(" ") # -------------------------------------------------------------------------------- # Clean up variables related to tracking of invalid (rejected) micrographs # -------------------------------------------------------------------------------- del no_src_mic_id_substr_list else: assert (dst_mic_pattern is not None) assert (os.path.exists(dst_dir)) # Prepare lists to keep track of invalid (rejected) micrographs no_mic_in_both_dirs_id_substr_list = [] already_in_dst_dir_mic_id_substr_list = [] duplicated_in_dst_dir_mic_id_substr_list = [] # Loop over substring id list for mic_id_substr in global_entry_dict: mic_id_entry = global_entry_dict[mic_id_substr] warinnig_messages = [] # selected micrograph basename must have been registed always . if subkey_select_mic_basename in mic_id_entry: # Check if associated input micrograph exists if not subkey_src_mic_path in mic_id_entry: mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) if not subkey_dst_mic_path in mic_id_entry: warinnig_messages.append( " associated micrograph (%s) does not exist neither in the source directory (%s) nor in the destination directory (%s)." % (mic_basename, src_dir, dst_dir)) no_mic_in_both_dirs_id_substr_list.append( mic_id_substr) else: assert (subkey_dst_mic_path in mic_id_entry) warinnig_messages.append( " associated micrograph (%s) exists only in the destination directory (%s), but not in the source directory (%s)." % (mic_basename, dst_dir, src_dir)) already_in_dst_dir_mic_id_substr_list.append( mic_id_substr) else: assert (subkey_src_mic_path in mic_id_entry) if subkey_dst_mic_path in mic_id_entry: mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) warinnig_messages.append( " associated micrograph (%s) exist both in the source directory (%s) and in the destination directory (%s)." % (mic_basename, src_dir, dst_dir)) duplicated_in_dst_dir_mic_id_substr_list.append( mic_id_substr) # else: # # This should most typical case! # assert (not subkey_dst_mic_path in mic_id_entry) if len(warinnig_messages) > 0: print_progress( "WARNING!!! Micrograph ID %s has problems with consistency among the provided dataset:" % (mic_id_substr)) for warinnig_message in warinnig_messages: print_progress(warinnig_message) print_progress(" Ignores this as an invalid entry.") else: # print("MRK_DEBUG: adding mic_id_substr := ", mic_id_substr) valid_mic_id_substr_list.append(mic_id_substr) # else: # assert (not subkey_select_mic_basename in mic_id_entry) # # This entry is not in the selection list. Do nothing # Check the input dataset consistency and save the result to a text file, if necessary. if args.check_consistency: assert (os.path.exists(dst_dir)) # Open the consistency check file mic_consistency_check_info_path = os.path.join( record_dir, "mic_consistency_check_info_%s.txt" % (get_time_stamp_suffix())) print(" ") print_progress( "Generating consistency report of the provided dataset in %s..." % (mic_consistency_check_info_path)) mic_consistency_check_info_file = open( mic_consistency_check_info_path, "w") mic_consistency_check_info_file.write( "# The consistency information about micrograph IDs that might have problmes with consistency among the provided dataset.\n" ) mic_consistency_check_info_file.write("# \n") # Loop over substring id list for mic_id_substr in global_entry_dict: mic_id_entry = global_entry_dict[mic_id_substr] consistency_messages = [] # Check if associated micrograph path exists in source directory if not subkey_src_mic_path in mic_id_entry: mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) consistency_messages.append( " associated micrograph (%s) does not exist in the source directory (%s)." % (mic_basename, src_dir)) # Check if associated micrograph basename exists in selection list if not subkey_select_mic_basename in mic_id_entry: mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) consistency_messages.append( " associated micrograph (%s) is not in the selection list (%s)." % (mic_basename, select_list_path)) # Check if associated micrograph path does not exist in destination directory if subkey_dst_mic_path in mic_id_entry: mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) consistency_messages.append( " associated micrograph (%s) already exist in the destination directory (%s)." % (mic_basename, dst_dir)) if len(consistency_messages) > 0: mic_consistency_check_info_file.write( "Micrograph ID %s have inconsistency among provided dataset:\n" % (mic_id_substr)) for consistency_message in consistency_messages: mic_consistency_check_info_file.write( consistency_message) mic_consistency_check_info_file.write("\n") # Close the consistency check file, if necessary mic_consistency_check_info_file.flush() mic_consistency_check_info_file.close() # Since mic_id_substr is once stored as the key of global_entry_dict and extracted with the key order # we need sort the valid_mic_id_substr_list here # print("MRK_DEBUG: before sort, valid_mic_id_substr_list := ", valid_mic_id_substr_list) valid_mic_id_substr_list.sort() # print("MRK_DEBUG: after sort, valid_mic_id_substr_list := ", valid_mic_id_substr_list) # -------------------------------------------------------------------------------- # Print out the summary of input consistency # -------------------------------------------------------------------------------- print(" ") print_progress("Summary of dataset consistency check...") print_progress("Detected : %6d" % (len(global_entry_dict))) print_progress("Valid : %6d" % (len(valid_mic_id_substr_list))) print_progress("Rejected by not found in both dirs : %6d" % (len(no_mic_in_both_dirs_id_substr_list))) print_progress("Rejected by already in dst dir : %6d" % (len(already_in_dst_dir_mic_id_substr_list))) print_progress("Rejected by duplicated in dst dir : %6d" % (len(duplicated_in_dst_dir_mic_id_substr_list))) print(" ") # -------------------------------------------------------------------------------- # Save the list of duplicated_micrographs in duplicated_micrographs_DATE_TIME.txt # under destination directory if necessary # -------------------------------------------------------------------------------- if len(duplicated_in_dst_dir_mic_id_substr_list) > 0: duplicated_mic_list_path = os.path.join( record_dir, "duplicated_micrographs_%s.txt" % (get_time_stamp_suffix())) print_progress( "Storing the list of duplicated micrographs in %s." % (duplicated_mic_list_path)) print(" ") # Open the duplicated micrograph list file duplicated_mic_list_file = open(duplicated_mic_list_path, "w") for mic_id_substr in duplicated_in_dst_dir_mic_id_substr_list: duplicated_mic_basename = mic_basename_pattern.replace( "*", mic_id_substr) duplicated_mic_list_file.write(duplicated_mic_basename) duplicated_mic_list_file.write("\n") # Close the duplicated micrograph list file duplicated_mic_list_file.flush() duplicated_mic_list_file.close() # -------------------------------------------------------------------------------- # Clean up variables related to tracking of invalid (rejected) micrographs # -------------------------------------------------------------------------------- del no_mic_in_both_dirs_id_substr_list del already_in_dst_dir_mic_id_substr_list del duplicated_in_dst_dir_mic_id_substr_list # -------------------------------------------------------------------------------- # Create destination directory # -------------------------------------------------------------------------------- if not os.path.exists(dst_dir): print(" ") print_progress("Creating the destination directory (%)..." % (dst_dir)) os.mkdir(dst_dir) assert (os.path.exists(dst_dir)) # -------------------------------------------------------------------------------- # Move micrographs in selecting list form source directory to destination directory # -------------------------------------------------------------------------------- # Prepare the counters for the global summary of micrographs n_moved_mics = 0 if len(valid_mic_id_substr_list) > 0: print(" ") print_progress( "Moving micrographs in the selecting list (%s) from the source directory (%s) to the destination directory (%s)..." % (select_list_path, src_dir, dst_dir)) ### print("Micrographs processed (including percent of progress):") ### progress_percent_step = len(valid_mic_id_substr_list)*0.1 # Report every 10% of the number of micrograms # Loop over substring id list for mic_id_substr_idx, mic_id_substr in enumerate( valid_mic_id_substr_list): mic_id_entry = global_entry_dict[mic_id_substr] mic_basename = mic_id_entry[subkey_select_mic_basename] assert (mic_basename == mic_basename_pattern.replace( "*", mic_id_substr)) ### # Print out progress if necessary ### print("%s ---> % 2.2f%%" % (mic_basename, mic_id_substr_idx / progress_percent_step)) # At this point, this micrograph # - must exist in source directory # - must NOT exist in destination directory # because of the consistency check above assert (subkey_src_mic_path in mic_id_entry) assert (os.path.exists(mic_id_entry[subkey_src_mic_path])) assert (not os.path.exists(os.path.join(dst_dir, mic_basename))) # Move this micrograph from input directory to output directory src_mic_path = mic_id_entry[subkey_src_mic_path] shutil.move(src_mic_path, dst_dir) n_moved_mics += 1 # Print summary of processing print(" ") print_progress("Summary of processing...") print_progress("Moved : %6d" % (n_moved_mics)) print(" ")
def main(): import sys import os import math import random import pyemtbx.options import time from random import random, seed, randint from optparse import OptionParser progname = os.path.basename(sys.argv[0]) usage = progname + """ [options] <inputfile> <outputfile> Generic 2-D image processing programs. Functionality: 1. Phase flip a stack of images and write output to new file: sxprocess.py input_stack.hdf output_stack.hdf --phase_flip 2. Resample (decimate or interpolate up) images (2D or 3D) in a stack to change the pixel size. The window size will change accordingly. sxprocess input.hdf output.hdf --changesize --ratio=0.5 3. Compute average power spectrum of a stack of 2D images with optional padding (option wn) with zeroes. sxprocess.py input_stack.hdf powerspectrum.hdf --pw [--wn=1024] 4. Generate a stack of projections bdb:data and micrographs with prefix mic (i.e., mic0.hdf, mic1.hdf etc) from structure input_structure.hdf, with CTF applied to both projections and micrographs: sxprocess.py input_structure.hdf data mic --generate_projections format="bdb":apix=5.2:CTF=True:boxsize=64 5. Retrieve original image numbers in the selected ISAC group (here group 12 from generation 3): sxprocess.py bdb:test3 class_averages_generation_3.hdf list3_12.txt --isacgroup=12 --params=originalid 6. Retrieve original image numbers of images listed in ISAC output stack of averages: sxprocess.py select1.hdf ohk.txt 7. Adjust rotationally averaged power spectrum of an image to that of a reference image or a reference 1D power spectrum stored in an ASCII file. Optionally use a tangent low-pass filter. Also works for a stack of images, in which case the output is also a stack. sxprocess.py vol.hdf ref.hdf avol.hdf < 0.25 0.2> --adjpw sxprocess.py vol.hdf pw.txt avol.hdf < 0.25 0.2> --adjpw 8. Generate a 1D rotationally averaged power spectrum of an image. sxprocess.py vol.hdf --rotwp=rotpw.txt # Output will contain three columns: (1) rotationally averaged power spectrum (2) logarithm of the rotationally averaged power spectrum (3) integer line number (from zero to approximately to half the image size) 9. Apply 3D transformation (rotation and/or shift) to a set of orientation parameters associated with projection data. sxprocess.py --transfromparams=phi,theta,psi,tx,ty,tz input.txt output.txt The output file is then imported and 3D transformed volume computed: sxheader.py bdb:p --params=xform.projection --import=output.txt mpirun -np 2 sxrecons3d_n.py bdb:p tvol.hdf --MPI The reconstructed volume is in the position of the volume computed using the input.txt parameters and then transformed with rot_shift3D(vol, phi,theta,psi,tx,ty,tz) 10. Import ctf parameters from the output of sxcter into windowed particle headers. There are three possible input files formats: (1) all particles are in one stack, (2 aor 3) particles are in stacks, each stack corresponds to a single micrograph. In each case the particles should contain a name of the micrograph of origin stores using attribute name 'ptcl_source_image'. Normally this is done by e2boxer.py during windowing. Particles whose defocus or astigmatism error exceed set thresholds will be skipped, otherwise, virtual stacks with the original way preceded by G will be created. sxprocess.py --input=bdb:data --importctf=outdir/partres --defocuserror=10.0 --astigmatismerror=5.0 # Output will be a vritual stack bdb:Gdata sxprocess.py --input="bdb:directory/stacks*" --importctf=outdir/partres --defocuserror=10.0 --astigmatismerror=5.0 To concatenate output files: cd directory e2bdb.py . --makevstack=bdb:allparticles --filt=G IMPORTANT: Please do not move (or remove!) any input/intermediate EMAN2DB files as the information is linked between them. 11. Scale 3D shifts. The shifts in the input five columns text file with 3D orientation parameters will be DIVIDED by the scale factor sxprocess.py orientationparams.txt scaledparams.txt scale=0.5 12. Generate adaptive mask from a given 3-D volume. """ parser = OptionParser(usage, version=SPARXVERSION) parser.add_option( "--order", action="store_true", help= "Two arguments are required: name of input stack and desired name of output stack. The output stack is the input stack sorted by similarity in terms of cross-correlation coefficent.", default=False) parser.add_option("--order_lookup", action="store_true", help="Test/Debug.", default=False) parser.add_option("--order_metropolis", action="store_true", help="Test/Debug.", default=False) parser.add_option("--order_pca", action="store_true", help="Test/Debug.", default=False) parser.add_option( "--initial", type="int", default=-1, help= "Specifies which image will be used as an initial seed to form the chain. (default = 0, means the first image)" ) parser.add_option( "--circular", action="store_true", help= "Select circular ordering (fisr image has to be similar to the last", default=False) parser.add_option( "--radius", type="int", default=-1, help="Radius of a circular mask for similarity based ordering") parser.add_option( "--changesize", action="store_true", help= "resample (decimate or interpolate up) images (2D or 3D) in a stack to change the pixel size.", default=False) parser.add_option( "--ratio", type="float", default=1.0, help= "The ratio of new to old image size (if <1 the pixel size will increase and image size decrease, if>1, the other way round" ) parser.add_option( "--pw", action="store_true", help= "compute average power spectrum of a stack of 2-D images with optional padding (option wn) with zeroes", default=False) parser.add_option( "--wn", type="int", default=-1, help= "Size of window to use (should be larger/equal than particle box size, default padding to max(nx,ny))" ) parser.add_option("--phase_flip", action="store_true", help="Phase flip the input stack", default=False) parser.add_option( "--makedb", metavar="param1=value1:param2=value2", type="string", action="append", help= "One argument is required: name of key with which the database will be created. Fill in database with parameters specified as follows: --makedb param1=value1:param2=value2, e.g. 'gauss_width'=1.0:'pixel_input'=5.2:'pixel_output'=5.2:'thr_low'=1.0" ) parser.add_option( "--generate_projections", metavar="param1=value1:param2=value2", type="string", action="append", help= "Three arguments are required: name of input structure from which to generate projections, desired name of output projection stack, and desired prefix for micrographs (e.g. if prefix is 'mic', then micrographs mic0.hdf, mic1.hdf etc will be generated). Optional arguments specifying format, apix, box size and whether to add CTF effects can be entered as follows after --generate_projections: format='bdb':apix=5.2:CTF=True:boxsize=100, or format='hdf', etc., where format is bdb or hdf, apix (pixel size) is a float, CTF is True or False, and boxsize denotes the dimension of the box (assumed to be a square). If an optional parameter is not specified, it will default as follows: format='bdb', apix=2.5, CTF=False, boxsize=64." ) parser.add_option( "--isacgroup", type="int", help= "Retrieve original image numbers in the selected ISAC group. See ISAC documentation for details.", default=-1) parser.add_option( "--isacselect", action="store_true", help= "Retrieve original image numbers of images listed in ISAC output stack of averages. See ISAC documentation for details.", default=False) parser.add_option( "--params", type="string", default=None, help="Name of header of parameter, which one depends on specific option" ) parser.add_option( "--adjpw", action="store_true", help="Adjust rotationally averaged power spectrum of an image", default=False) parser.add_option( "--rotpw", type="string", default=None, help= "Name of the text file to contain rotationally averaged power spectrum of the input image." ) parser.add_option( "--transformparams", type="string", default=None, help= "Transform 3D projection orientation parameters using six 3D parameters (phi, theta,psi,sx,sy,sz). Input: --transformparams=45.,66.,12.,-2,3,-5.5 desired six transformation of the reconstructed structure. Output: file with modified orientation parameters." ) # import ctf estimates done using cter parser.add_option("--input", type="string", default=None, help="Input particles.") parser.add_option( "--importctf", type="string", default=None, help="Name of the file containing CTF parameters produced by sxcter.") parser.add_option( "--defocuserror", type="float", default=1000000.0, help= "Exclude micrographs whose relative defocus error as estimated by sxcter is larger than defocuserror percent. The error is computed as (std dev defocus)/defocus*100%" ) parser.add_option( "--astigmatismerror", type="float", default=360.0, help= "Set to zero astigmatism for micrographs whose astigmatism angular error as estimated by sxcter is larger than astigmatismerror degrees." ) # import ctf estimates done using cter parser.add_option( "--scale", type="float", default=-1.0, help= "Divide shifts in the input 3D orientation parameters text file by the scale factor." ) # generate adaptive mask from an given 3-Db volue parser.add_option("--adaptive_mask", action="store_true", help="create adavptive 3-D mask from a given volume", default=False) parser.add_option( "--nsigma", type="float", default=1., help= "number of times of sigma of the input volume to obtain the the large density cluster" ) parser.add_option( "--ndilation", type="int", default=3, help= "number of times of dilation applied to the largest cluster of density" ) parser.add_option( "--kernel_size", type="int", default=11, help="convolution kernel for smoothing the edge of the mask") parser.add_option( "--gauss_standard_dev", type="int", default=9, help="stanadard deviation value to generate Gaussian edge") (options, args) = parser.parse_args() global_def.BATCH = True if options.phase_flip: nargs = len(args) if nargs != 2: print "must provide name of input and output file!" return from EMAN2 import Processor instack = args[0] outstack = args[1] nima = EMUtil.get_image_count(instack) from filter import filt_ctf for i in xrange(nima): img = EMData() img.read_image(instack, i) try: ctf = img.get_attr('ctf') except: print "no ctf information in input stack! Exiting..." return dopad = True sign = 1 binary = 1 # phase flip assert img.get_ysize() > 1 dict = ctf.to_dict() dz = dict["defocus"] cs = dict["cs"] voltage = dict["voltage"] pixel_size = dict["apix"] b_factor = dict["bfactor"] ampcont = dict["ampcont"] dza = dict["dfdiff"] azz = dict["dfang"] if dopad and not img.is_complex(): ip = 1 else: ip = 0 params = { "filter_type": Processor.fourier_filter_types.CTF_, "defocus": dz, "Cs": cs, "voltage": voltage, "Pixel_size": pixel_size, "B_factor": b_factor, "amp_contrast": ampcont, "dopad": ip, "binary": binary, "sign": sign, "dza": dza, "azz": azz } tmp = Processor.EMFourierFilter(img, params) tmp.set_attr_dict({"ctf": ctf}) tmp.write_image(outstack, i) elif options.changesize: nargs = len(args) if nargs != 2: ERROR("must provide name of input and output file!", "change size", 1) return from utilities import get_im instack = args[0] outstack = args[1] sub_rate = float(options.ratio) nima = EMUtil.get_image_count(instack) from fundamentals import resample for i in xrange(nima): resample(get_im(instack, i), sub_rate).write_image(outstack, i) elif options.isacgroup > -1: nargs = len(args) if nargs != 3: ERROR("Three files needed on input!", "isacgroup", 1) return from utilities import get_im instack = args[0] m = get_im(args[1], int(options.isacgroup)).get_attr("members") l = [] for k in m: l.append(int(get_im(args[0], k).get_attr(options.params))) from utilities import write_text_file write_text_file(l, args[2]) elif options.isacselect: nargs = len(args) if nargs != 2: ERROR("Two files needed on input!", "isacgroup", 1) return from utilities import get_im nima = EMUtil.get_image_count(args[0]) m = [] for k in xrange(nima): m += get_im(args[0], k).get_attr("members") m.sort() from utilities import write_text_file write_text_file(m, args[1]) elif options.pw: nargs = len(args) if nargs < 2: ERROR("must provide name of input and output file!", "pw", 1) return from utilities import get_im d = get_im(args[0]) nx = d.get_xsize() ny = d.get_ysize() if nargs == 3: mask = get_im(args[2]) wn = int(options.wn) if wn == -1: wn = max(nx, ny) else: if ((wn < nx) or (wn < ny)): ERROR("window size cannot be smaller than the image size", "pw", 1) n = EMUtil.get_image_count(args[0]) from utilities import model_blank, model_circle, pad from EMAN2 import periodogram p = model_blank(wn, wn) for i in xrange(n): d = get_im(args[0], i) if nargs == 3: d *= mask st = Util.infomask(d, None, True) d -= st[0] p += periodogram(pad(d, wn, wn, 1, 0.)) p /= n p.write_image(args[1]) elif options.adjpw: if len(args) < 3: ERROR( "filt_by_rops input target output fl aa (the last two are optional parameters of a low-pass filter)", "adjpw", 1) return img_stack = args[0] from math import sqrt from fundamentals import rops_table, fft from utilities import read_text_file, get_im from filter import filt_tanl, filt_table if (args[1][-3:] == 'txt'): rops_dst = read_text_file(args[1]) else: rops_dst = rops_table(get_im(args[1])) out_stack = args[2] if (len(args) > 4): fl = float(args[3]) aa = float(args[4]) else: fl = -1.0 aa = 0.0 nimage = EMUtil.get_image_count(img_stack) for i in xrange(nimage): img = fft(get_im(img_stack, i)) rops_src = rops_table(img) assert len(rops_dst) == len(rops_src) table = [0.0] * len(rops_dst) for j in xrange(len(rops_dst)): table[j] = sqrt(rops_dst[j] / rops_src[j]) if (fl > 0.0): img = filt_tanl(img, fl, aa) img = fft(filt_table(img, table)) img.write_image(out_stack, i) elif options.rotpw != None: if len(args) != 1: ERROR("Only one input permitted", "rotpw", 1) return from utilities import write_text_file, get_im from fundamentals import rops_table from math import log10 t = rops_table(get_im(args[0])) x = range(len(t)) r = [0.0] * len(x) for i in x: r[i] = log10(t[i]) write_text_file([t, r, x], options.rotpw) elif options.transformparams != None: if len(args) != 2: ERROR( "Please provide names of input and output files with orientation parameters", "transformparams", 1) return from utilities import read_text_row, write_text_row transf = [0.0] * 6 spl = options.transformparams.split(',') for i in xrange(len(spl)): transf[i] = float(spl[i]) write_text_row(rotate_shift_params(read_text_row(args[0]), transf), args[1]) elif options.makedb != None: nargs = len(args) if nargs != 1: print "must provide exactly one argument denoting database key under which the input params will be stored" return dbkey = args[0] print "database key under which params will be stored: ", dbkey gbdb = js_open_dict("e2boxercache/gauss_box_DB.json") parmstr = 'dummy:' + options.makedb[0] (processorname, param_dict) = parsemodopt(parmstr) dbdict = {} for pkey in param_dict: if (pkey == 'invert_contrast') or (pkey == 'use_variance'): if param_dict[pkey] == 'True': dbdict[pkey] = True else: dbdict[pkey] = False else: dbdict[pkey] = param_dict[pkey] gbdb[dbkey] = dbdict elif options.generate_projections: nargs = len(args) if nargs != 3: ERROR("Must provide name of input structure(s) from which to generate projections, name of output projection stack, and prefix for output micrographs."\ "sxprocess - generate projections",1) return inpstr = args[0] outstk = args[1] micpref = args[2] parmstr = 'dummy:' + options.generate_projections[0] (processorname, param_dict) = parsemodopt(parmstr) parm_CTF = False parm_format = 'bdb' parm_apix = 2.5 if 'CTF' in param_dict: if param_dict['CTF'] == 'True': parm_CTF = True if 'format' in param_dict: parm_format = param_dict['format'] if 'apix' in param_dict: parm_apix = float(param_dict['apix']) boxsize = 64 if 'boxsize' in param_dict: boxsize = int(param_dict['boxsize']) print "pixel size: ", parm_apix, " format: ", parm_format, " add CTF: ", parm_CTF, " box size: ", boxsize scale_mult = 2500 sigma_add = 1.5 sigma_proj = 30.0 sigma2_proj = 17.5 sigma_gauss = 0.3 sigma_mic = 30.0 sigma2_mic = 17.5 sigma_gauss_mic = 0.3 if 'scale_mult' in param_dict: scale_mult = float(param_dict['scale_mult']) if 'sigma_add' in param_dict: sigma_add = float(param_dict['sigma_add']) if 'sigma_proj' in param_dict: sigma_proj = float(param_dict['sigma_proj']) if 'sigma2_proj' in param_dict: sigma2_proj = float(param_dict['sigma2_proj']) if 'sigma_gauss' in param_dict: sigma_gauss = float(param_dict['sigma_gauss']) if 'sigma_mic' in param_dict: sigma_mic = float(param_dict['sigma_mic']) if 'sigma2_mic' in param_dict: sigma2_mic = float(param_dict['sigma2_mic']) if 'sigma_gauss_mic' in param_dict: sigma_gauss_mic = float(param_dict['sigma_gauss_mic']) from filter import filt_gaussl, filt_ctf from utilities import drop_spider_doc, even_angles, model_gauss, delete_bdb, model_blank, pad, model_gauss_noise, set_params2D, set_params_proj from projection import prep_vol, prgs seed(14567) delta = 29 angles = even_angles(delta, 0.0, 89.9, 0.0, 359.9, "S") nangle = len(angles) modelvol = [] nvlms = EMUtil.get_image_count(inpstr) from utilities import get_im for k in xrange(nvlms): modelvol.append(get_im(inpstr, k)) nx = modelvol[0].get_xsize() if nx != boxsize: ERROR("Requested box dimension does not match dimension of the input model.", \ "sxprocess - generate projections",1) nvol = 10 volfts = [[] for k in xrange(nvlms)] for k in xrange(nvlms): for i in xrange(nvol): sigma = sigma_add + random() # 1.5-2.5 addon = model_gauss(sigma, boxsize, boxsize, boxsize, sigma, sigma, 38, 38, 40) scale = scale_mult * (0.5 + random()) vf, kb = prep_vol(modelvol[k] + scale * addon) volfts[k].append(vf) del vf, modelvol if parm_format == "bdb": stack_data = "bdb:" + outstk delete_bdb(stack_data) else: stack_data = outstk + ".hdf" Cs = 2.0 pixel = parm_apix voltage = 120.0 ampcont = 10.0 ibd = 4096 / 2 - boxsize iprj = 0 width = 240 xstart = 8 + boxsize / 2 ystart = 8 + boxsize / 2 rowlen = 17 from random import randint params = [] for idef in xrange(3, 8): irow = 0 icol = 0 mic = model_blank(4096, 4096) defocus = idef * 0.5 #0.2 if parm_CTF: astampl = defocus * 0.15 astangl = 50.0 ctf = generate_ctf([ defocus, Cs, voltage, pixel, ampcont, 0.0, astampl, astangl ]) for i in xrange(nangle): for k in xrange(12): dphi = 8.0 * (random() - 0.5) dtht = 8.0 * (random() - 0.5) psi = 360.0 * random() phi = angles[i][0] + dphi tht = angles[i][1] + dtht s2x = 4.0 * (random() - 0.5) s2y = 4.0 * (random() - 0.5) params.append([phi, tht, psi, s2x, s2y]) ivol = iprj % nvol #imgsrc = randint(0,nvlms-1) imgsrc = iprj % nvlms proj = prgs(volfts[imgsrc][ivol], kb, [phi, tht, psi, -s2x, -s2y]) x = xstart + irow * width y = ystart + icol * width mic += pad(proj, 4096, 4096, 1, 0.0, x - 2048, y - 2048, 0) proj = proj + model_gauss_noise(sigma_proj, nx, nx) if parm_CTF: proj = filt_ctf(proj, ctf) proj.set_attr_dict({"ctf": ctf, "ctf_applied": 0}) proj = proj + filt_gaussl( model_gauss_noise(sigma2_proj, nx, nx), sigma_gauss) proj.set_attr("origimgsrc", imgsrc) proj.set_attr("test_id", iprj) # flags describing the status of the image (1 = true, 0 = false) set_params2D(proj, [0.0, 0.0, 0.0, 0, 1.0]) set_params_proj(proj, [phi, tht, psi, s2x, s2y]) proj.write_image(stack_data, iprj) icol += 1 if icol == rowlen: icol = 0 irow += 1 iprj += 1 mic += model_gauss_noise(sigma_mic, 4096, 4096) if parm_CTF: #apply CTF mic = filt_ctf(mic, ctf) mic += filt_gaussl(model_gauss_noise(sigma2_mic, 4096, 4096), sigma_gauss_mic) mic.write_image(micpref + "%1d.hdf" % (idef - 3), 0) drop_spider_doc("params.txt", params) elif options.importctf != None: print ' IMPORTCTF ' from utilities import read_text_row, write_text_row from random import randint import subprocess grpfile = 'groupid%04d' % randint(1000, 9999) ctfpfile = 'ctfpfile%04d' % randint(1000, 9999) cterr = [options.defocuserror / 100.0, options.astigmatismerror] ctfs = read_text_row(options.importctf) for kk in xrange(len(ctfs)): root, name = os.path.split(ctfs[kk][-1]) ctfs[kk][-1] = name[:-4] if (options.input[:4] != 'bdb:'): ERROR('Sorry, only bdb files implemented', 'importctf', 1) d = options.input[4:] #try: str = d.index('*') #except: str = -1 from string import split import glob uu = os.path.split(d) uu = os.path.join(uu[0], 'EMAN2DB', uu[1] + '.bdb') flist = glob.glob(uu) for i in xrange(len(flist)): root, name = os.path.split(flist[i]) root = root[:-7] name = name[:-4] fil = 'bdb:' + os.path.join(root, name) sourcemic = EMUtil.get_all_attributes(fil, 'ptcl_source_image') nn = len(sourcemic) gctfp = [] groupid = [] for kk in xrange(nn): junk, name2 = os.path.split(sourcemic[kk]) name2 = name2[:-4] ctfp = [-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] for ll in xrange(len(ctfs)): if (name2 == ctfs[ll][-1]): # found correct if (ctfs[ll][8] / ctfs[ll][0] <= cterr[0]): # acceptable defocus error ctfp = ctfs[ll][:8] if (ctfs[ll][10] > cterr[1]): # error of astigmatism exceed the threshold, set astigmatism to zero. ctfp[6] = 0.0 ctfp[7] = 0.0 gctfp.append(ctfp) groupid.append(kk) break if (len(groupid) > 0): write_text_row(groupid, grpfile) write_text_row(gctfp, ctfpfile) cmd = "{} {} {} {}".format( 'e2bdb.py', fil, '--makevstack=bdb:' + root + 'G' + name, '--list=' + grpfile) #print cmd subprocess.call(cmd, shell=True) cmd = "{} {} {} {}".format('sxheader.py', 'bdb:' + root + 'G' + name, '--params=ctf', '--import=' + ctfpfile) #print cmd subprocess.call(cmd, shell=True) else: print ' >>> Group ', name, ' skipped.' cmd = "{} {} {}".format("rm -f", grpfile, ctfpfile) subprocess.call(cmd, shell=True) elif options.scale > 0.0: from utilities import read_text_row, write_text_row scale = options.scale nargs = len(args) if nargs != 2: print "Please provide names of input and output file!" return p = read_text_row(args[0]) for i in xrange(len(p)): p[i][3] /= scale p[i][4] /= scale write_text_row(p, args[1]) elif options.adaptive_mask: from utilities import get_im from morphology import adaptive_mask nsigma = options.nsigma ndilation = options.ndilation kernel_size = options.kernel_size gauss_standard_dev = options.gauss_standard_dev nargs = len(args) if nargs > 2: print "Too many inputs are given, try again!" return else: inputvol = get_im(args[0]) input_path, input_file_name = os.path.split(args[0]) input_file_name_root, ext = os.path.splitext(input_file_name) if nargs == 2: mask_file_name = args[1] else: mask_file_name = "adaptive_mask_for" + input_file_name_root + ".hdf" # Only hdf file is output. mask3d = adaptive_mask(inputvol, nsigma, ndilation, kernel_size, gauss_standard_dev) mask3d.write_image(mask_file_name) else: ERROR("Please provide option name", "sxprocess.py", 1)