def get_image_size( imgdata, myid ): from mpi import mpi_gather, mpi_bcast, MPI_COMM_WORLD, MPI_INT nimg = len(imgdata) nimgs = mpi_gather( nimg, 1, MPI_INT, 1, MPI_INT, 0, MPI_COMM_WORLD ) if myid==0: src = -1 for i in xrange( len(nimgs) ): if int(nimgs[i]) > 0 : src = i break if src==-1: return 0 else: src = -1 size_src = mpi_bcast( src, 1, MPI_INT, 0, MPI_COMM_WORLD ) if myid==int(size_src[0]): assert nimg > 0 size = imgdata[0].get_xsize() else: size = -1 nx = mpi_bcast( size, 1, MPI_INT, size_src[0], MPI_COMM_WORLD ) return int(nx[0])
def identify_outliers(myid, main_node, rviper_iter, no_of_viper_runs_analyzed_together, no_of_viper_runs_analyzed_together_from_user_options, masterdir, bdb_stack_location, outlier_percentile, criterion_name, outlier_index_threshold_method, angle_threshold): no_of_viper_runs_analyzed_together_must_be_incremented = 0 do_calculation = 1 if (myid == main_node): mainoutputdir = masterdir + DIR_DELIM + NAME_OF_MAIN_DIR + ("%03d" + DIR_DELIM) % (rviper_iter) if(os.path.exists(mainoutputdir + DIR_DELIM + "list_of_viper_runs_included_in_outlier_elimination.json")): # list_of_independent_viper_run_indices_used_for_outlier_elimination = map(int, read_text_file(mainoutputdir + DIR_DELIM + "list_of_viper_runs_included_in_outlier_elimination.txt")) import json; f = open(mainoutputdir + "list_of_viper_runs_included_in_outlier_elimination.json", 'r') list_of_independent_viper_run_indices_used_for_outlier_elimination = json.load(f); f.close() do_calculation = 0 do_calculation = mpi_bcast(do_calculation, 1, MPI_INT, 0, MPI_COMM_WORLD)[0] else: do_calculation = mpi_bcast(do_calculation, 1, MPI_INT, 0, MPI_COMM_WORLD)[0] if do_calculation: list_of_independent_viper_run_indices_used_for_outlier_elimination = calculate_list_of_independent_viper_run_indices_used_for_outlier_elimination(no_of_viper_runs_analyzed_together, no_of_viper_runs_analyzed_together_from_user_options, masterdir, rviper_iter, criterion_name) # only master has the actual list: list_of_independent_viper_run_indices_used_for_outlier_elimination # only master has the actual list: list_of_independent_viper_run_indices_used_for_outlier_elimination # only master has the actual list: list_of_independent_viper_run_indices_used_for_outlier_elimination error_status = 0 if (myid == main_node): # if len(list_of_independent_viper_run_indices_used_for_outlier_elimination) == 0: if list_of_independent_viper_run_indices_used_for_outlier_elimination[0] == EMPTY_VIPER_RUN_INDICES_LIST: if no_of_viper_runs_analyzed_together > MAXIMUM_NO_OF_VIPER_RUNS_ANALYZED_TOGETHER: error_status = 1 print "RVIPER reached maximum number of VIPER runs analyzed together without finding a core set of stable projections for the current RVIPER iteration (%d)! Finishing."%rviper_iter cmd = "{} {}".format("mkdir ", masterdir + "MAXIMUM_NO_OF_VIPER_RUNS_ANALYZED_TOGETHER__Reached"); cmdexecute(cmd) else: # No set of solutions has been found to make a selection for outlier elimination. # A new independent viper run will be performed no_of_viper_runs_analyzed_together_must_be_incremented = 1 cmd = "{} {}".format("rm ", mainoutputdir + "list_of_viper_runs_included_in_outlier_elimination.json"); cmdexecute(cmd) else: # Outliers are eliminated based on the viper runs contained in "list_of_independent_viper_run_indices_used_for_outlier_elimination" if list_of_independent_viper_run_indices_used_for_outlier_elimination[0] == MUST_END_PROGRAM_THIS_ITERATION: no_of_viper_runs_analyzed_together_must_be_incremented = MUST_END_PROGRAM_THIS_ITERATION found_outliers(list_of_independent_viper_run_indices_used_for_outlier_elimination[1:], outlier_percentile, rviper_iter, masterdir, bdb_stack_location, "use all images", angle_threshold) else: # still need to eliminate DUMMY_INDEX_USED_AS_BUFFER found_outliers(list_of_independent_viper_run_indices_used_for_outlier_elimination[1:], outlier_percentile, rviper_iter, masterdir, bdb_stack_location, outlier_index_threshold_method, angle_threshold) if_error_then_all_processes_exit_program(error_status) no_of_viper_runs_analyzed_together_must_be_incremented = mpi_bcast(no_of_viper_runs_analyzed_together_must_be_incremented, 1, MPI_INT, 0, MPI_COMM_WORLD)[0] return no_of_viper_runs_analyzed_together_must_be_incremented
def mpi_bcast_recv(src): """Unlike the C routine, in this python module, mpi_bcast is split into a send and a receive method. Send must be called on exactly one core, and receive called on all of the others. This routine also coordinates transfer of variable length objects. src is the rank of the sender""" from mpi import mpi_bcast, MPI_CHAR, MPI_COMM_WORLD l = mpi_bcast(None, 4, MPI_CHAR, src, MPI_COMM_WORLD) l=unpack("I",l)[0] data = mpi_bcast(None, l, MPI_CHAR, src, MPI_COMM_WORLD) return loads(str(data.data))
def mpi_bcast_send(data): """Unlike the C routine, in this python module, mpi_bcast is split into a send and a receive method. Send must be called on exactly one core, and receive called on all of the others. This routine also coordinates transfer of variable length objects.""" from mpi import mpi_comm_rank, mpi_bcast, MPI_CHAR, MPI_COMM_WORLD data=dumps(data,-1) l=pack("I",len(data)) rank = mpi_comm_rank(MPI_COMM_WORLD) mpi_bcast(l, len(l), MPI_CHAR, rank, MPI_COMM_WORLD) mpi_bcast(data, len(data), MPI_CHAR, rank, MPI_COMM_WORLD)
def if_error_then_all_processes_exit_program(error_status): import sys, os from utilities import print_msg if "OMPI_COMM_WORLD_SIZE" not in os.environ: def mpi_comm_rank(n): return 0 def mpi_bcast(*largs): return [largs[0]] def mpi_finalize(): return None MPI_INT, MPI_COMM_WORLD = 0, 0 else: from mpi import mpi_comm_rank, mpi_bcast, mpi_finalize, MPI_INT, MPI_COMM_WORLD myid = mpi_comm_rank(MPI_COMM_WORLD) if error_status != None and error_status != 0: error_status_info = error_status error_status = 1 else: error_status = 0 error_status = mpi_bcast(error_status, 1, MPI_INT, 0, MPI_COMM_WORLD) error_status = int(error_status[0]) if error_status > 0: if myid == 0: if type(error_status_info) == type((1,1)): if len(error_status_info) == 2: frameinfo = error_status_info[1] print_msg("***********************************\n") print_msg("** Error: %s\n"%error_status_info[0]) print_msg("***********************************\n") print_msg("** Location: %s\n"%(frameinfo.filename + ":" + str(frameinfo.lineno))) print_msg("***********************************\n") sys.stdout.flush() mpi_finalize() sys.exit(1)
def main(): from logger import Logger, BaseLogger_Files arglist = [] i = 0 while( i < len(sys.argv) ): if sys.argv[i]=='-p4pg': i = i+2 elif sys.argv[i]=='-p4wd': i = i+2 else: arglist.append( sys.argv[i] ) i = i+1 progname = os.path.basename(arglist[0]) usage = progname + " stack outdir <mask> --focus=3Dmask --radius=outer_radius --delta=angular_step" +\ "--an=angular_neighborhood --maxit=max_iter --CTF --sym=c1 --function=user_function --independent=indenpendent_runs --number_of_images_per_group=number_of_images_per_group --low_pass_frequency=.25 --seed=random_seed" parser = OptionParser(usage,version=SPARXVERSION) parser.add_option("--focus", type ="string", default ='', help="bineary 3D mask for focused clustering ") parser.add_option("--ir", type = "int", default =1, help="inner radius for rotational correlation > 0 (set to 1)") parser.add_option("--radius", type = "int", default =-1, help="particle radius in pixel for rotational correlation <nx-1 (set to the radius of the particle)") parser.add_option("--maxit", type = "int", default =25, help="maximum number of iteration") parser.add_option("--rs", type = "int", default =1, help="step between rings in rotational correlation >0 (set to 1)" ) parser.add_option("--xr", type ="string", default ='1', help="range for translation search in x direction, search is +/-xr ") parser.add_option("--yr", type ="string", default ='-1', help="range for translation search in y direction, search is +/-yr (default = same as xr)") parser.add_option("--ts", type ="string", default ='0.25', help="step size of the translation search in both directions direction, search is -xr, -xr+ts, 0, xr-ts, xr ") parser.add_option("--delta", type ="string", default ='2', help="angular step of reference projections") parser.add_option("--an", type ="string", default ='-1', help="angular neighborhood for local searches") parser.add_option("--center", type ="int", default =0, help="0 - if you do not want the volume to be centered, 1 - center the volume using cog (default=0)") parser.add_option("--nassign", type ="int", default =1, help="number of reassignment iterations performed for each angular step (set to 3) ") parser.add_option("--nrefine", type ="int", default =0, help="number of alignment iterations performed for each angular step (set to 0)") parser.add_option("--CTF", action ="store_true", default =False, help="do CTF correction during clustring") parser.add_option("--stoprnct", type ="float", default =3.0, help="Minimum percentage of assignment change to stop the program") parser.add_option("--sym", type ="string", default ='c1', help="symmetry of the structure ") parser.add_option("--function", type ="string", default ='do_volume_mrk05', help="name of the reference preparation function") parser.add_option("--independent", type ="int", default = 3, help="number of independent run") parser.add_option("--number_of_images_per_group", type ="int", default =1000, help="number of groups") parser.add_option("--low_pass_filter", type ="float", default =-1.0, help="absolute frequency of low-pass filter for 3d sorting on the original image size" ) parser.add_option("--nxinit", type ="int", default =64, help="initial image size for sorting" ) parser.add_option("--unaccounted", action ="store_true", default =False, help="reconstruct the unaccounted images") parser.add_option("--seed", type ="int", default =-1, help="random seed for create initial random assignment for EQ Kmeans") parser.add_option("--smallest_group", type ="int", default =500, help="minimum members for identified group") parser.add_option("--sausage", action ="store_true", default =False, help="way of filter volume") parser.add_option("--chunkdir", type ="string", default ='', help="chunkdir for computing margin of error") parser.add_option("--PWadjustment", type ="string", default ='', help="1-D power spectrum of PDB file used for EM volume power spectrum correction") parser.add_option("--protein_shape", type ="string", default ='g', help="protein shape. It defines protein preferred orientation angles. Currently it has g and f two types ") parser.add_option("--upscale", type ="float", default =0.5, help=" scaling parameter to adjust the power spectrum of EM volumes") parser.add_option("--wn", type ="int", default =0, help="optimal window size for data processing") parser.add_option("--interpolation", type ="string", default ="4nn", help="3-d reconstruction interpolation method, two options trl and 4nn") (options, args) = parser.parse_args(arglist[1:]) if len(args) < 1 or len(args) > 4: print "usage: " + usage print "Please run '" + progname + " -h' for detailed options" else: if len(args)>2: mask_file = args[2] else: mask_file = None orgstack =args[0] masterdir =args[1] global_def.BATCH = True #---initialize MPI related variables from mpi import mpi_init, mpi_comm_size, MPI_COMM_WORLD, mpi_comm_rank,mpi_barrier,mpi_bcast, mpi_bcast, MPI_INT,MPI_CHAR sys.argv = mpi_init(len(sys.argv),sys.argv) nproc = mpi_comm_size(MPI_COMM_WORLD) myid = mpi_comm_rank(MPI_COMM_WORLD) mpi_comm = MPI_COMM_WORLD main_node= 0 # import some utilities from utilities import get_im,bcast_number_to_all,cmdexecute,write_text_file,read_text_file,wrap_mpi_bcast, get_params_proj, write_text_row from applications import recons3d_n_MPI, mref_ali3d_MPI, Kmref_ali3d_MPI from statistics import k_means_match_clusters_asg_new,k_means_stab_bbenum from applications import mref_ali3d_EQ_Kmeans, ali3d_mref_Kmeans_MPI # Create the main log file from logger import Logger,BaseLogger_Files if myid ==main_node: log_main=Logger(BaseLogger_Files()) log_main.prefix = masterdir+"/" else: log_main =None #--- fill input parameters into dictionary named after Constants Constants ={} Constants["stack"] = args[0] Constants["masterdir"] = masterdir Constants["mask3D"] = mask_file Constants["focus3Dmask"] = options.focus Constants["indep_runs"] = options.independent Constants["stoprnct"] = options.stoprnct Constants["number_of_images_per_group"] = options.number_of_images_per_group Constants["CTF"] = options.CTF Constants["maxit"] = options.maxit Constants["ir"] = options.ir Constants["radius"] = options.radius Constants["nassign"] = options.nassign Constants["rs"] = options.rs Constants["xr"] = options.xr Constants["yr"] = options.yr Constants["ts"] = options.ts Constants["delta"] = options.delta Constants["an"] = options.an Constants["sym"] = options.sym Constants["center"] = options.center Constants["nrefine"] = options.nrefine #Constants["fourvar"] = options.fourvar Constants["user_func"] = options.function Constants["low_pass_filter"] = options.low_pass_filter # enforced low_pass_filter #Constants["debug"] = options.debug Constants["main_log_prefix"] = args[1] #Constants["importali3d"] = options.importali3d Constants["myid"] = myid Constants["main_node"] = main_node Constants["nproc"] = nproc Constants["log_main"] = log_main Constants["nxinit"] = options.nxinit Constants["unaccounted"] = options.unaccounted Constants["seed"] = options.seed Constants["smallest_group"] = options.smallest_group Constants["sausage"] = options.sausage Constants["chunkdir"] = options.chunkdir Constants["PWadjustment"] = options.PWadjustment Constants["upscale"] = options.upscale Constants["wn"] = options.wn Constants["3d-interpolation"] = options.interpolation Constants["protein_shape"] = options.protein_shape # ----------------------------------------------------- # # Create and initialize Tracker dictionary with input options Tracker = {} Tracker["constants"] = Constants Tracker["maxit"] = Tracker["constants"]["maxit"] Tracker["radius"] = Tracker["constants"]["radius"] #Tracker["xr"] = "" #Tracker["yr"] = "-1" # Do not change! #Tracker["ts"] = 1 #Tracker["an"] = "-1" #Tracker["delta"] = "2.0" #Tracker["zoom"] = True #Tracker["nsoft"] = 0 #Tracker["local"] = False #Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] Tracker["upscale"] = Tracker["constants"]["upscale"] #Tracker["upscale"] = 0.5 Tracker["applyctf"] = False # Should the data be premultiplied by the CTF. Set to False for local continuous. #Tracker["refvol"] = None Tracker["nxinit"] = Tracker["constants"]["nxinit"] #Tracker["nxstep"] = 32 Tracker["icurrentres"] = -1 #Tracker["ireachedres"] = -1 #Tracker["lowpass"] = 0.4 #Tracker["falloff"] = 0.2 #Tracker["inires"] = options.inires # Now in A, convert to absolute before using Tracker["fuse_freq"] = 50 # Now in A, convert to absolute before using #Tracker["delpreviousmax"] = False #Tracker["anger"] = -1.0 #Tracker["shifter"] = -1.0 #Tracker["saturatecrit"] = 0.95 #Tracker["pixercutoff"] = 2.0 #Tracker["directory"] = "" #Tracker["previousoutputdir"] = "" #Tracker["eliminated-outliers"] = False #Tracker["mainiteration"] = 0 #Tracker["movedback"] = False #Tracker["state"] = Tracker["constants"]["states"][0] #Tracker["global_resolution"] =0.0 Tracker["orgstack"] = orgstack #-------------------------------------------------------------------- # import from utilities from utilities import sample_down_1D_curve,get_initial_ID,remove_small_groups,print_upper_triangular_matrix,print_a_line_with_timestamp from utilities import print_dict,get_resolution_mrk01,partition_to_groups,partition_independent_runs,get_outliers from utilities import merge_groups, save_alist, margin_of_error, get_margin_of_error, do_two_way_comparison, select_two_runs, get_ali3d_params from utilities import counting_projections, unload_dict, load_dict, get_stat_proj, create_random_list, get_number_of_groups, recons_mref from utilities import apply_low_pass_filter, get_groups_from_partition, get_number_of_groups, get_complementary_elements_total, update_full_dict from utilities import count_chunk_members, set_filter_parameters_from_adjusted_fsc, adjust_fsc_down, get_two_chunks_from_stack ####------------------------------------------------------------------ # # Get the pixel size; if none, set to 1.0, and the original image size from utilities import get_shrink_data_huang if(myid == main_node): line = strftime("%Y-%m-%d_%H:%M:%S", localtime()) + " =>" print(line+"Initialization of 3-D sorting") a = get_im(orgstack) nnxo = a.get_xsize() if( Tracker["nxinit"] > nnxo ): ERROR("Image size less than minimum permitted $d"%Tracker["nxinit"],"sxsort3d.py",1) nnxo = -1 else: if Tracker["constants"]["CTF"]: i = a.get_attr('ctf') pixel_size = i.apix fq = pixel_size/Tracker["fuse_freq"] else: pixel_size = 1.0 # No pixel size, fusing computed as 5 Fourier pixels fq = 5.0/nnxo del a else: nnxo = 0 fq = 0.0 pixel_size = 1.0 nnxo = bcast_number_to_all(nnxo, source_node = main_node) if( nnxo < 0 ): mpi_finalize() exit() pixel_size = bcast_number_to_all(pixel_size, source_node = main_node) fq = bcast_number_to_all(fq, source_node = main_node) if Tracker["constants"]["wn"]==0: Tracker["constants"]["nnxo"] = nnxo else: Tracker["constants"]["nnxo"] = Tracker["constants"]["wn"] nnxo = Tracker["constants"]["nnxo"] Tracker["constants"]["pixel_size"] = pixel_size Tracker["fuse_freq"] = fq del fq, nnxo, pixel_size if(Tracker["constants"]["radius"] < 1): Tracker["constants"]["radius"] = Tracker["constants"]["nnxo"]//2-2 elif((2*Tracker["constants"]["radius"] +2) > Tracker["constants"]["nnxo"]): ERROR("Particle radius set too large!","sxsort3d.py",1,myid) ####----------------------------------------------------------------------------------------- # Master directory if myid == main_node: if masterdir =="": timestring = strftime("_%d_%b_%Y_%H_%M_%S", localtime()) masterdir ="master_sort3d"+timestring li =len(masterdir) cmd="{} {}".format("mkdir", masterdir) os.system(cmd) else: li=0 li = mpi_bcast(li,1,MPI_INT,main_node,MPI_COMM_WORLD)[0] if li>0: masterdir = mpi_bcast(masterdir,li,MPI_CHAR,main_node,MPI_COMM_WORLD) import string masterdir = string.join(masterdir,"") if myid ==main_node: print_dict(Tracker["constants"],"Permanent settings of 3-D sorting program") ######### create a vstack from input stack to the local stack in masterdir # stack name set to default Tracker["constants"]["stack"] = "bdb:"+masterdir+"/rdata" Tracker["constants"]["ali3d"] = os.path.join(masterdir, "ali3d_init.txt") Tracker["constants"]["ctf_params"] = os.path.join(masterdir, "ctf_params.txt") Tracker["constants"]["partstack"] = Tracker["constants"]["ali3d"] # also serves for refinement if myid == main_node: total_stack = EMUtil.get_image_count(Tracker["orgstack"]) else: total_stack = 0 total_stack = bcast_number_to_all(total_stack, source_node = main_node) mpi_barrier(MPI_COMM_WORLD) from time import sleep while not os.path.exists(masterdir): print "Node ",myid," waiting..." sleep(5) mpi_barrier(MPI_COMM_WORLD) if myid == main_node: log_main.add("Sphire sort3d ") log_main.add("the sort3d master directory is "+masterdir) ##### ###---------------------------------------------------------------------------------- # Initial data analysis and handle two chunk files from random import shuffle # Compute the resolution #### make chunkdir dictionary for computing margin of error import user_functions user_func = user_functions.factory[Tracker["constants"]["user_func"]] chunk_dict = {} chunk_list = [] if myid == main_node: chunk_one = read_text_file(os.path.join(Tracker["constants"]["chunkdir"],"chunk0.txt")) chunk_two = read_text_file(os.path.join(Tracker["constants"]["chunkdir"],"chunk1.txt")) else: chunk_one = 0 chunk_two = 0 chunk_one = wrap_mpi_bcast(chunk_one, main_node) chunk_two = wrap_mpi_bcast(chunk_two, main_node) mpi_barrier(MPI_COMM_WORLD) ######################## Read/write bdb: data on main node ############################ if myid==main_node: if(orgstack[:4] == "bdb:"): cmd = "{} {} {}".format("e2bdb.py", orgstack,"--makevstack="+Tracker["constants"]["stack"]) else: cmd = "{} {} {}".format("sxcpy.py", orgstack, Tracker["constants"]["stack"]) cmdexecute(cmd) cmd = "{} {} {}".format("sxheader.py --params=xform.projection", "--export="+Tracker["constants"]["ali3d"],orgstack) cmdexecute(cmd) cmd = "{} {} {}".format("sxheader.py --params=ctf", "--export="+Tracker["constants"]["ctf_params"],orgstack) cmdexecute(cmd) mpi_barrier(MPI_COMM_WORLD) ########----------------------------------------------------------------------------- Tracker["total_stack"] = total_stack Tracker["constants"]["total_stack"] = total_stack Tracker["shrinkage"] = float(Tracker["nxinit"])/Tracker["constants"]["nnxo"] Tracker["radius"] = Tracker["constants"]["radius"]*Tracker["shrinkage"] if Tracker["constants"]["mask3D"]: Tracker["mask3D"] = os.path.join(masterdir,"smask.hdf") else: Tracker["mask3D"] = None if Tracker["constants"]["focus3Dmask"]: Tracker["focus3D"] = os.path.join(masterdir,"sfocus.hdf") else: Tracker["focus3D"] = None if myid == main_node: if Tracker["constants"]["mask3D"]: mask_3D = get_shrink_3dmask(Tracker["nxinit"],Tracker["constants"]["mask3D"]) mask_3D.write_image(Tracker["mask3D"]) if Tracker["constants"]["focus3Dmask"]: mask_3D = get_shrink_3dmask(Tracker["nxinit"],Tracker["constants"]["focus3Dmask"]) st = Util.infomask(mask_3D, None, True) if( st[0] == 0.0 ): ERROR("sxrsort3d","incorrect focused mask, after binarize all values zero",1) mask_3D.write_image(Tracker["focus3D"]) del mask_3D if Tracker["constants"]["PWadjustment"] !='': PW_dict = {} nxinit_pwsp = sample_down_1D_curve(Tracker["constants"]["nxinit"],Tracker["constants"]["nnxo"],Tracker["constants"]["PWadjustment"]) Tracker["nxinit_PW"] = os.path.join(masterdir,"spwp.txt") if myid == main_node: write_text_file(nxinit_pwsp,Tracker["nxinit_PW"]) PW_dict[Tracker["constants"]["nnxo"]] = Tracker["constants"]["PWadjustment"] PW_dict[Tracker["constants"]["nxinit"]] = Tracker["nxinit_PW"] Tracker["PW_dict"] = PW_dict mpi_barrier(MPI_COMM_WORLD) #-----------------------From two chunks to FSC, and low pass filter-----------------------------------------### for element in chunk_one: chunk_dict[element] = 0 for element in chunk_two: chunk_dict[element] = 1 chunk_list =[chunk_one, chunk_two] Tracker["chunk_dict"] = chunk_dict Tracker["P_chunk0"] = len(chunk_one)/float(total_stack) Tracker["P_chunk1"] = len(chunk_two)/float(total_stack) ### create two volumes to estimate resolution if myid == main_node: for index in xrange(2): write_text_file(chunk_list[index],os.path.join(masterdir,"chunk%01d.txt"%index)) mpi_barrier(MPI_COMM_WORLD) vols = [] for index in xrange(2): data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nxinit"], os.path.join(masterdir,"chunk%01d.txt"%index), Tracker["constants"]["partstack"],myid,main_node,nproc,preshift=True) vol = recons3d_4nn_ctf_MPI(myid=myid, prjlist=data,symmetry=Tracker["constants"]["sym"], finfo=None) if myid == main_node: vol.write_image(os.path.join(masterdir, "vol%d.hdf"%index)) vols.append(vol) mpi_barrier(MPI_COMM_WORLD) if myid ==main_node: low_pass, falloff,currentres = get_resolution_mrk01(vols,Tracker["constants"]["radius"],Tracker["constants"]["nxinit"],masterdir,Tracker["mask3D"]) if low_pass >Tracker["constants"]["low_pass_filter"]: low_pass= Tracker["constants"]["low_pass_filter"] else: low_pass =0.0 falloff =0.0 currentres =0.0 bcast_number_to_all(currentres,source_node = main_node) bcast_number_to_all(low_pass,source_node = main_node) bcast_number_to_all(falloff,source_node = main_node) Tracker["currentres"] = currentres Tracker["falloff"] = falloff if Tracker["constants"]["low_pass_filter"] ==-1.0: Tracker["low_pass_filter"] = min(.45,low_pass/Tracker["shrinkage"]) # no better than .45 else: Tracker["low_pass_filter"] = min(.45,Tracker["constants"]["low_pass_filter"]/Tracker["shrinkage"]) Tracker["lowpass"] = Tracker["low_pass_filter"] Tracker["falloff"] =.1 Tracker["global_fsc"] = os.path.join(masterdir, "fsc.txt") ############################################################################################ if myid == main_node: log_main.add("The command-line inputs are as following:") log_main.add("**********************************************************") for a in sys.argv: if myid == main_node:log_main.add(a) if myid == main_node: log_main.add("number of cpus used in this run is %d"%Tracker["constants"]["nproc"]) log_main.add("**********************************************************") from filter import filt_tanl ### START 3-D sorting if myid ==main_node: log_main.add("----------3-D sorting program------- ") log_main.add("current resolution %6.3f for images of original size in terms of absolute frequency"%Tracker["currentres"]) log_main.add("equivalent to %f Angstrom resolution"%(Tracker["constants"]["pixel_size"]/Tracker["currentres"]/Tracker["shrinkage"])) log_main.add("the user provided enforced low_pass_filter is %f"%Tracker["constants"]["low_pass_filter"]) #log_main.add("equivalent to %f Angstrom resolution"%(Tracker["constants"]["pixel_size"]/Tracker["constants"]["low_pass_filter"])) for index in xrange(2): filt_tanl(get_im(os.path.join(masterdir,"vol%01d.hdf"%index)), Tracker["low_pass_filter"],Tracker["falloff"]).write_image(os.path.join(masterdir, "volf%01d.hdf"%index)) mpi_barrier(MPI_COMM_WORLD) from utilities import get_input_from_string delta = get_input_from_string(Tracker["constants"]["delta"]) delta = delta[0] from utilities import even_angles n_angles = even_angles(delta, 0, 180) this_ali3d = Tracker["constants"]["ali3d"] sampled = get_stat_proj(Tracker,delta,this_ali3d) if myid ==main_node: nc = 0 for a in sampled: if len(sampled[a])>0: nc += 1 log_main.add("total sampled direction %10d at angle step %6.3f"%(len(n_angles), delta)) log_main.add("captured sampled directions %10d percentage covered by data %6.3f"%(nc,float(nc)/len(n_angles)*100)) number_of_images_per_group = Tracker["constants"]["number_of_images_per_group"] if myid ==main_node: log_main.add("user provided number_of_images_per_group %d"%number_of_images_per_group) Tracker["number_of_images_per_group"] = number_of_images_per_group number_of_groups = get_number_of_groups(total_stack,number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups generation =0 partition_dict ={} full_dict ={} workdir =os.path.join(masterdir,"generation%03d"%generation) Tracker["this_dir"] = workdir if myid ==main_node: log_main.add("---- generation %5d"%generation) log_main.add("number of images per group is set as %d"%number_of_images_per_group) log_main.add("the initial number of groups is %10d "%number_of_groups) cmd="{} {}".format("mkdir",workdir) os.system(cmd) mpi_barrier(MPI_COMM_WORLD) list_to_be_processed = range(Tracker["constants"]["total_stack"]) Tracker["this_data_list"] = list_to_be_processed create_random_list(Tracker) ################################# full_dict ={} for iptl in xrange(Tracker["constants"]["total_stack"]): full_dict[iptl] = iptl Tracker["full_ID_dict"] = full_dict ################################# for indep_run in xrange(Tracker["constants"]["indep_runs"]): Tracker["this_particle_list"] = Tracker["this_indep_list"][indep_run] ref_vol = recons_mref(Tracker) if myid == main_node: log_main.add("independent run %10d"%indep_run) mpi_barrier(MPI_COMM_WORLD) Tracker["this_data_list"] = list_to_be_processed Tracker["total_stack"] = len(Tracker["this_data_list"]) Tracker["this_particle_text_file"] = os.path.join(workdir,"independent_list_%03d.txt"%indep_run) # for get_shrink_data if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_particle_text_file"]) mpi_barrier(MPI_COMM_WORLD) outdir = os.path.join(workdir, "EQ_Kmeans%03d"%indep_run) ref_vol = apply_low_pass_filter(ref_vol,Tracker) mref_ali3d_EQ_Kmeans(ref_vol, outdir, Tracker["this_particle_text_file"], Tracker) partition_dict[indep_run]=Tracker["this_partition"] Tracker["partition_dict"] = partition_dict Tracker["total_stack"] = len(Tracker["this_data_list"]) Tracker["this_total_stack"] = Tracker["total_stack"] ############################### do_two_way_comparison(Tracker) ############################### ref_vol_list = [] from time import sleep number_of_ref_class = [] for igrp in xrange(len(Tracker["two_way_stable_member"])): Tracker["this_data_list"] = Tracker["two_way_stable_member"][igrp] Tracker["this_data_list_file"] = os.path.join(workdir,"stable_class%d.txt"%igrp) if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_data_list_file"]) data,old_shifts = get_shrink_data_huang(Tracker,Tracker["nxinit"], Tracker["this_data_list_file"], Tracker["constants"]["partstack"], myid, main_node, nproc, preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"], finfo = None) ref_vol_list.append(volref) number_of_ref_class.append(len(Tracker["this_data_list"])) if myid == main_node: log_main.add("group %d members %d "%(igrp,len(Tracker["this_data_list"]))) Tracker["number_of_ref_class"] = number_of_ref_class nx_of_image = ref_vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] # no PW adjustment if myid == main_node: for iref in xrange(len(ref_vol_list)): refdata = [None]*4 refdata[0] = ref_vol_list[iref] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image(os.path.join(workdir,"volf_stable.hdf"),iref) mpi_barrier(MPI_COMM_WORLD) Tracker["this_data_list"] = Tracker["this_accounted_list"] outdir = os.path.join(workdir,"Kmref") empty_group, res_groups, final_list = ali3d_mref_Kmeans_MPI(ref_vol_list,outdir,Tracker["this_accounted_text"],Tracker) Tracker["this_unaccounted_list"] = get_complementary_elements(list_to_be_processed,final_list) if myid == main_node: log_main.add("the number of particles not processed is %d"%len(Tracker["this_unaccounted_list"])) write_text_file(Tracker["this_unaccounted_list"],Tracker["this_unaccounted_text"]) update_full_dict(Tracker["this_unaccounted_list"], Tracker) ####################################### number_of_groups = len(res_groups) vol_list = [] number_of_ref_class = [] for igrp in xrange(number_of_groups): data,old_shifts = get_shrink_data_huang(Tracker, Tracker["constants"]["nnxo"], os.path.join(outdir,"Class%d.txt"%igrp), Tracker["constants"]["partstack"],myid,main_node,nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"], finfo=None) vol_list.append(volref) if( myid == main_node ): npergroup = len(read_text_file(os.path.join(outdir,"Class%d.txt"%igrp))) else: npergroup = 0 npergroup = bcast_number_to_all(npergroup, main_node ) number_of_ref_class.append(npergroup) Tracker["number_of_ref_class"] = number_of_ref_class mpi_barrier(MPI_COMM_WORLD) nx_of_image = vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"]=Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"]=Tracker["constants"]["PWadjustment"] if myid == main_node: for ivol in xrange(len(vol_list)): refdata =[None]*4 refdata[0] = vol_list[ivol] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image(os.path.join(workdir,"volf_of_Classes.hdf"),ivol) log_main.add("number of unaccounted particles %10d"%len(Tracker["this_unaccounted_list"])) log_main.add("number of accounted particles %10d"%len(Tracker["this_accounted_list"])) Tracker["this_data_list"] = Tracker["this_unaccounted_list"] # reset parameters for the next round calculation Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) Tracker["this_total_stack"] = Tracker["total_stack"] number_of_groups = get_number_of_groups(len(Tracker["this_unaccounted_list"]),number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups while number_of_groups >= 2 : generation +=1 partition_dict ={} workdir =os.path.join(masterdir,"generation%03d"%generation) Tracker["this_dir"] = workdir if myid ==main_node: log_main.add("*********************************************") log_main.add("----- generation %5d "%generation) log_main.add("number of images per group is set as %10d "%number_of_images_per_group) log_main.add("the number of groups is %10d "%number_of_groups) log_main.add(" number of particles for clustering is %10d"%Tracker["total_stack"]) cmd ="{} {}".format("mkdir",workdir) os.system(cmd) mpi_barrier(MPI_COMM_WORLD) create_random_list(Tracker) for indep_run in xrange(Tracker["constants"]["indep_runs"]): Tracker["this_particle_list"] = Tracker["this_indep_list"][indep_run] ref_vol = recons_mref(Tracker) if myid == main_node: log_main.add("independent run %10d"%indep_run) outdir = os.path.join(workdir, "EQ_Kmeans%03d"%indep_run) Tracker["this_data_list"] = Tracker["this_unaccounted_list"] #ref_vol=apply_low_pass_filter(ref_vol,Tracker) mref_ali3d_EQ_Kmeans(ref_vol,outdir,Tracker["this_unaccounted_text"],Tracker) partition_dict[indep_run] = Tracker["this_partition"] Tracker["this_data_list"] = Tracker["this_unaccounted_list"] Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) Tracker["partition_dict"] = partition_dict Tracker["this_total_stack"] = Tracker["total_stack"] total_list_of_this_run = Tracker["this_unaccounted_list"] ############################### do_two_way_comparison(Tracker) ############################### ref_vol_list = [] number_of_ref_class = [] for igrp in xrange(len(Tracker["two_way_stable_member"])): Tracker["this_data_list"] = Tracker["two_way_stable_member"][igrp] Tracker["this_data_list_file"] = os.path.join(workdir,"stable_class%d.txt"%igrp) if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_data_list_file"]) mpi_barrier(MPI_COMM_WORLD) data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nxinit"],Tracker["this_data_list_file"],Tracker["constants"]["partstack"],myid,main_node,nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"],finfo= None) #volref = filt_tanl(volref, Tracker["constants"]["low_pass_filter"],.1) if myid == main_node:volref.write_image(os.path.join(workdir,"vol_stable.hdf"),iref) #volref = resample(volref,Tracker["shrinkage"]) ref_vol_list.append(volref) number_of_ref_class.append(len(Tracker["this_data_list"])) mpi_barrier(MPI_COMM_WORLD) Tracker["number_of_ref_class"] = number_of_ref_class Tracker["this_data_list"] = Tracker["this_accounted_list"] outdir = os.path.join(workdir,"Kmref") empty_group, res_groups, final_list = ali3d_mref_Kmeans_MPI(ref_vol_list,outdir,Tracker["this_accounted_text"],Tracker) # calculate the 3-D structure of original image size for each group number_of_groups = len(res_groups) Tracker["this_unaccounted_list"] = get_complementary_elements(total_list_of_this_run,final_list) if myid == main_node: log_main.add("the number of particles not processed is %d"%len(Tracker["this_unaccounted_list"])) write_text_file(Tracker["this_unaccounted_list"],Tracker["this_unaccounted_text"]) mpi_barrier(MPI_COMM_WORLD) update_full_dict(Tracker["this_unaccounted_list"],Tracker) vol_list = [] for igrp in xrange(number_of_groups): data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nnxo"], os.path.join(outdir,"Class%d.txt"%igrp), Tracker["constants"]["partstack"], myid, main_node, nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"],finfo= None) vol_list.append(volref) mpi_barrier(MPI_COMM_WORLD) nx_of_image=ref_vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] if myid == main_node: for ivol in xrange(len(vol_list)): refdata = [None]*4 refdata[0] = vol_list[ivol] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image(os.path.join(workdir, "volf_of_Classes.hdf"),ivol) log_main.add("number of unaccounted particles %10d"%len(Tracker["this_unaccounted_list"])) log_main.add("number of accounted particles %10d"%len(Tracker["this_accounted_list"])) del vol_list mpi_barrier(MPI_COMM_WORLD) number_of_groups = get_number_of_groups(len(Tracker["this_unaccounted_list"]),number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups Tracker["this_data_list"] = Tracker["this_unaccounted_list"] Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) if Tracker["constants"]["unaccounted"]: data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nnxo"],Tracker["this_unaccounted_text"],Tracker["constants"]["partstack"],myid,main_node,nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"],finfo= None) nx_of_image = volref.get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"]=Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"]=Tracker["constants"]["PWadjustment"] if( myid == main_node ): refdata = [None]*4 refdata[0] = volref refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) #volref = filt_tanl(volref, Tracker["constants"]["low_pass_filter"],.1) volref.write_image(os.path.join(workdir,"volf_unaccounted.hdf")) # Finish program if myid ==main_node: log_main.add("sxsort3d finishes") mpi_barrier(MPI_COMM_WORLD) from mpi import mpi_finalize mpi_finalize() exit()
def main(): from logger import Logger, BaseLogger_Files import user_functions from optparse import OptionParser, SUPPRESS_HELP from global_def import SPARXVERSION from EMAN2 import EMData main_node = 0 mpi_init(0, []) mpi_comm = MPI_COMM_WORLD myid = mpi_comm_rank(MPI_COMM_WORLD) mpi_size = mpi_comm_size(MPI_COMM_WORLD) # Total number of processes, passed by --np option. # mpi_barrier(mpi_comm) # from mpi import mpi_finalize # mpi_finalize() # print "mpi finalize" # from sys import exit # exit() progname = os.path.basename(sys.argv[0]) usage = progname + " stack [output_directory] --ir=inner_radius --radius=outer_radius --rs=ring_step --xr=x_range --yr=y_range --ts=translational_search_step --delta=angular_step --an=angular_neighborhood --center=center_type --maxit1=max_iter1 --maxit2=max_iter2 --L2threshold=0.1 --fl --aa --ref_a=S --sym=c1" usage += """ stack 2D images in a stack file: (default required string) output_directory: directory name into which the output files will be written. If it does not exist, the directory will be created. If it does exist, the program will continue executing from where it stopped (if it did not already reach the end). The "--use_latest_master_directory" option can be used to choose the most recent directory that starts with "master". """ parser = OptionParser(usage,version=SPARXVERSION) parser.add_option("--radius", type="int", help="radius of the particle: has to be less than < int(nx/2)-1 (default required int)") parser.add_option("--ir", type="int", default=1, help="inner radius for rotational search: > 0 (default 1)") parser.add_option("--rs", type="int", default=1, help="step between rings in rotational search: >0 (default 1)") parser.add_option("--xr", type="string", default='0', help="range for translation search in x direction: search is +/xr in pixels (default '0')") parser.add_option("--yr", type="string", default='0', help="range for translation search in y direction: if omitted will be set to xr, search is +/yr in pixels (default '0')") parser.add_option("--ts", type="string", default='1.0', help="step size of the translation search in x-y directions: search is -xr, -xr+ts, 0, xr-ts, xr, can be fractional (default '1.0')") parser.add_option("--delta", type="string", default='2.0', help="angular step of reference projections: (default '2.0')") #parser.add_option("--an", type="string", default= "-1", help="angular neighborhood for local searches (phi and theta)") parser.add_option("--center", type="float", default=-1.0, help="centering of 3D template: average shift method; 0: no centering; 1: center of gravity (default -1.0)") parser.add_option("--maxit1", type="int", default=400, help="maximum number of iterations performed for the GA part: (default 400)") parser.add_option("--maxit2", type="int", default=50, help="maximum number of iterations performed for the finishing up part: (default 50)") parser.add_option("--L2threshold", type="float", default=0.03, help="stopping criterion of GA: given as a maximum relative dispersion of volumes' L2 norms: (default 0.03)") parser.add_option("--doga", type="float", default=0.1, help="do GA when fraction of orientation changes less than 1.0 degrees is at least doga: (default 0.1)") parser.add_option("--n_shc_runs", type="int", default=4, help="number of quasi-independent shc runs (same as '--nruns' parameter from sxviper.py): (default 4)") parser.add_option("--n_rv_runs", type="int", default=10, help="number of rviper iterations: (default 10)") parser.add_option("--n_v_runs", type="int", default=3, help="number of viper runs for each r_viper cycle: (default 3)") parser.add_option("--outlier_percentile", type="float", default=95.0, help="percentile above which outliers are removed every rviper iteration: (default 95.0)") parser.add_option("--iteration_start", type="int", default=0, help="starting iteration for rviper: 0 means go to the most recent one (default 0)") #parser.add_option("--CTF", action="store_true", default=False, help="NOT IMPLEMENTED Consider CTF correction during the alignment ") #parser.add_option("--snr", type="float", default= 1.0, help="Signal-to-Noise Ratio of the data (default 1.0)") parser.add_option("--ref_a", type="string", default='S', help="method for generating the quasi-uniformly distributed projection directions: (default S)") parser.add_option("--sym", type="string", default='c1', help="point-group symmetry of the structure: (default c1)") # parser.add_option("--function", type="string", default="ref_ali3d", help="name of the reference preparation function (ref_ali3d by default)") ##### XXXXXXXXXXXXXXXXXXXXXX option does not exist in docs XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX parser.add_option("--function", type="string", default="ref_ali3d", help=SUPPRESS_HELP) parser.add_option("--npad", type="int", default=2, help="padding size for 3D reconstruction: (default 2)") # parser.add_option("--npad", type="int", default= 2, help="padding size for 3D reconstruction (default 2)") #options introduced for the do_volume function parser.add_option("--fl", type="float", default=0.25, help="cut-off frequency applied to the template volume: using a hyperbolic tangent low-pass filter (default 0.25)") parser.add_option("--aa", type="float", default=0.1, help="fall-off of hyperbolic tangent low-pass filter: (default 0.1)") parser.add_option("--pwreference", type="string", default='', help="text file with a reference power spectrum: (default none)") parser.add_option("--mask3D", type="string", default=None, help="3D mask file: (default sphere)") parser.add_option("--moon_elimination", type="string", default='', help="elimination of disconnected pieces: two arguments: mass in KDa and pixel size in px/A separated by comma, no space (default none)") # used for debugging, help is supressed with SUPPRESS_HELP ##### XXXXXXXXXXXXXXXXXXXXXX option does not exist in docs XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX parser.add_option("--my_random_seed", type="int", default=123, help = SUPPRESS_HELP) ##### XXXXXXXXXXXXXXXXXXXXXX option does not exist in docs XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX parser.add_option("--run_get_already_processed_viper_runs", action="store_true", dest="run_get_already_processed_viper_runs", default=False, help = SUPPRESS_HELP) ##### XXXXXXXXXXXXXXXXXXXXXX option does not exist in docs XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX parser.add_option("--use_latest_master_directory", action="store_true", dest="use_latest_master_directory", default=False, help = SUPPRESS_HELP) parser.add_option("--criterion_name", type="string", default='80th percentile',help="criterion deciding if volumes have a core set of stable projections: '80th percentile', other options:'fastest increase in the last quartile' (default '80th percentile')") parser.add_option("--outlier_index_threshold_method",type="string", default='discontinuity_in_derivative',help="method that decides which images to keep: discontinuity_in_derivative, other options:percentile, angle_measure (default discontinuity_in_derivative)") parser.add_option("--angle_threshold", type="int", default=30, help="angle threshold for projection removal if using 'angle_measure': (default 30)") required_option_list = ['radius'] (options, args) = parser.parse_args(sys.argv[1:]) options.CTF = False options.snr = 1.0 options.an = -1 if options.moon_elimination == "": options.moon_elimination = [] else: options.moon_elimination = map(float, options.moon_elimination.split(",")) # Making sure all required options appeared. for required_option in required_option_list: if not options.__dict__[required_option]: print "\n ==%s== mandatory option is missing.\n"%required_option print "Please run '" + progname + " -h' for detailed options" return 1 mpi_barrier(MPI_COMM_WORLD) if(myid == main_node): print "****************************************************************" Util.version() print "****************************************************************" sys.stdout.flush() mpi_barrier(MPI_COMM_WORLD) # this is just for benefiting from a user friendly parameter name options.ou = options.radius my_random_seed = options.my_random_seed criterion_name = options.criterion_name outlier_index_threshold_method = options.outlier_index_threshold_method use_latest_master_directory = options.use_latest_master_directory iteration_start_default = options.iteration_start number_of_rrr_viper_runs = options.n_rv_runs no_of_viper_runs_analyzed_together_from_user_options = options.n_v_runs no_of_shc_runs_analyzed_together = options.n_shc_runs outlier_percentile = options.outlier_percentile angle_threshold = options.angle_threshold run_get_already_processed_viper_runs = options.run_get_already_processed_viper_runs get_already_processed_viper_runs(run_get_already_processed_viper_runs) import random random.seed(my_random_seed) if len(args) < 1 or len(args) > 3: print "usage: " + usage print "Please run '" + progname + " -h' for detailed options" return 1 # if len(args) > 2: # ref_vol = get_im(args[2]) # else: ref_vol = None # error_status = None # if myid == 0: # number_of_images = EMUtil.get_image_count(args[0]) # if mpi_size > number_of_images: # error_status = ('Number of processes supplied by --np in mpirun needs to be less than or equal to %d (total number of images) ' % number_of_images, getframeinfo(currentframe())) # if_error_then_all_processes_exit_program(error_status) bdb_stack_location = "" masterdir = "" if len(args) == 2: masterdir = args[1] if masterdir[-1] != DIR_DELIM: masterdir += DIR_DELIM elif len(args) == 1: if use_latest_master_directory: all_dirs = [d for d in os.listdir(".") if os.path.isdir(d)] import re; r = re.compile("^master.*$") all_dirs = filter(r.match, all_dirs) if len(all_dirs)>0: # all_dirs = max(all_dirs, key=os.path.getctime) masterdir = max(all_dirs, key=os.path.getmtime) masterdir += DIR_DELIM log = Logger(BaseLogger_Files()) error_status = 0 if mpi_size % no_of_shc_runs_analyzed_together != 0: ERROR('Number of processes needs to be a multiple of the number of quasi-independent runs (shc) within each viper run. ' 'Total quasi-independent runs by default are 3, you can change it by specifying ' '--n_shc_runs option (in sxviper this option is called --nruns). Also, to improve communication time it is recommended that ' 'the number of processes divided by the number of quasi-independent runs is a power ' 'of 2 (e.g. 2, 4, 8 or 16 depending on how many physical cores each node has).', 'sxviper', 1) error_status = 1 if_error_then_all_processes_exit_program(error_status) #Create folder for all results or check if there is one created already if(myid == main_node): #cmd = "{}".format("Rmycounter ccc") #cmdexecute(cmd) if( masterdir == ""): timestring = strftime("%Y_%m_%d__%H_%M_%S" + DIR_DELIM, localtime()) masterdir = "master"+timestring if not os.path.exists(masterdir): cmd = "{} {}".format("mkdir", masterdir) cmdexecute(cmd) if ':' in args[0]: bdb_stack_location = args[0].split(":")[0] + ":" + masterdir + args[0].split(":")[1] org_stack_location = args[0] if(not os.path.exists(os.path.join(masterdir,"EMAN2DB" + DIR_DELIM))): # cmd = "{} {}".format("cp -rp EMAN2DB", masterdir, "EMAN2DB" DIR_DELIM) # cmdexecute(cmd) cmd = "{} {} {}".format("e2bdb.py", org_stack_location,"--makevstack=" + bdb_stack_location + "_000") cmdexecute(cmd) from applications import header try: header(bdb_stack_location + "_000", params='original_image_index', fprint=True) print "Images were already indexed!" except KeyError: print "Indexing images" header(bdb_stack_location + "_000", params='original_image_index', consecutive=True) else: filename = os.path.basename(args[0]) bdb_stack_location = "bdb:" + masterdir + os.path.splitext(filename)[0] if(not os.path.exists(os.path.join(masterdir,"EMAN2DB" + DIR_DELIM))): cmd = "{} {} {}".format("sxcpy.py ", args[0], bdb_stack_location + "_000") cmdexecute(cmd) from applications import header try: header(bdb_stack_location + "_000", params='original_image_index', fprint=True) print "Images were already indexed!" except KeyError: print "Indexing images" header(bdb_stack_location + "_000", params='original_image_index', consecutive=True) # send masterdir to all processes dir_len = len(masterdir)*int(myid == main_node) dir_len = mpi_bcast(dir_len,1,MPI_INT,0,MPI_COMM_WORLD)[0] masterdir = mpi_bcast(masterdir,dir_len,MPI_CHAR,main_node,MPI_COMM_WORLD) masterdir = string.join(masterdir,"") if masterdir[-1] != DIR_DELIM: masterdir += DIR_DELIM global_def.LOGFILE = os.path.join(masterdir, global_def.LOGFILE) print_program_start_information() # mpi_barrier(mpi_comm) # from mpi import mpi_finalize # mpi_finalize() # print "mpi finalize" # from sys import exit # exit() # send bdb_stack_location to all processes dir_len = len(bdb_stack_location)*int(myid == main_node) dir_len = mpi_bcast(dir_len,1,MPI_INT,0,MPI_COMM_WORLD)[0] bdb_stack_location = mpi_bcast(bdb_stack_location,dir_len,MPI_CHAR,main_node,MPI_COMM_WORLD) bdb_stack_location = string.join(bdb_stack_location,"") iteration_start = get_latest_directory_increment_value(masterdir, "main") if (myid == main_node): if (iteration_start < iteration_start_default): ERROR('Starting iteration provided is greater than last iteration performed. Quiting program', 'sxviper', 1) error_status = 1 if iteration_start_default!=0: iteration_start = iteration_start_default if (myid == main_node): if (number_of_rrr_viper_runs < iteration_start): ERROR('Please provide number of rviper runs (--n_rv_runs) greater than number of iterations already performed.', 'sxviper', 1) error_status = 1 if_error_then_all_processes_exit_program(error_status) for rviper_iter in range(iteration_start, number_of_rrr_viper_runs + 1): if(myid == main_node): all_projs = EMData.read_images(bdb_stack_location + "_%03d"%(rviper_iter - 1)) print "XXXXXXXXXXXXXXXXX" print "Number of projections (in loop): " + str(len(all_projs)) print "XXXXXXXXXXXXXXXXX" subset = range(len(all_projs)) else: all_projs = None subset = None runs_iter = get_latest_directory_increment_value(masterdir + NAME_OF_MAIN_DIR + "%03d"%rviper_iter, DIR_DELIM + NAME_OF_RUN_DIR, start_value=0) - 1 no_of_viper_runs_analyzed_together = max(runs_iter + 2, no_of_viper_runs_analyzed_together_from_user_options) first_time_entering_the_loop_need_to_do_full_check_up = True while True: runs_iter += 1 if not first_time_entering_the_loop_need_to_do_full_check_up: if runs_iter >= no_of_viper_runs_analyzed_together: break first_time_entering_the_loop_need_to_do_full_check_up = False this_run_is_NOT_complete = 0 if (myid == main_node): independent_run_dir = masterdir + DIR_DELIM + NAME_OF_MAIN_DIR + ('%03d' + DIR_DELIM + NAME_OF_RUN_DIR + "%03d" + DIR_DELIM)%(rviper_iter, runs_iter) if run_get_already_processed_viper_runs: cmd = "{} {}".format("mkdir -p", masterdir + DIR_DELIM + NAME_OF_MAIN_DIR + ('%03d' + DIR_DELIM)%(rviper_iter)); cmdexecute(cmd) cmd = "{} {}".format("rm -rf", independent_run_dir); cmdexecute(cmd) cmd = "{} {}".format("cp -r", get_already_processed_viper_runs() + " " + independent_run_dir); cmdexecute(cmd) if os.path.exists(independent_run_dir + "log.txt") and (string_found_in_file("Finish VIPER2", independent_run_dir + "log.txt")): this_run_is_NOT_complete = 0 else: this_run_is_NOT_complete = 1 cmd = "{} {}".format("rm -rf", independent_run_dir); cmdexecute(cmd) cmd = "{} {}".format("mkdir -p", independent_run_dir); cmdexecute(cmd) this_run_is_NOT_complete = mpi_bcast(this_run_is_NOT_complete,1,MPI_INT,main_node,MPI_COMM_WORLD)[0] dir_len = len(independent_run_dir) dir_len = mpi_bcast(dir_len,1,MPI_INT,main_node,MPI_COMM_WORLD)[0] independent_run_dir = mpi_bcast(independent_run_dir,dir_len,MPI_CHAR,main_node,MPI_COMM_WORLD) independent_run_dir = string.join(independent_run_dir,"") else: this_run_is_NOT_complete = mpi_bcast(this_run_is_NOT_complete,1,MPI_INT,main_node,MPI_COMM_WORLD)[0] dir_len = 0 independent_run_dir = "" dir_len = mpi_bcast(dir_len,1,MPI_INT,main_node,MPI_COMM_WORLD)[0] independent_run_dir = mpi_bcast(independent_run_dir,dir_len,MPI_CHAR,main_node,MPI_COMM_WORLD) independent_run_dir = string.join(independent_run_dir,"") if this_run_is_NOT_complete: mpi_barrier(MPI_COMM_WORLD) if independent_run_dir[-1] != DIR_DELIM: independent_run_dir += DIR_DELIM log.prefix = independent_run_dir options.user_func = user_functions.factory[options.function] # for debugging purposes #if (myid == main_node): #cmd = "{} {}".format("cp ~/log.txt ", independent_run_dir) #cmdexecute(cmd) #cmd = "{} {}{}".format("cp ~/paramdir/params$(mycounter ccc).txt ", independent_run_dir, "param%03d.txt"%runs_iter) #cmd = "{} {}{}".format("cp ~/paramdir/params$(mycounter ccc).txt ", independent_run_dir, "params.txt") #cmdexecute(cmd) if (myid == main_node): store_value_of_simple_vars_in_json_file(masterdir + 'program_state_stack.json', locals(), exclude_list_of_vars=["usage"], vars_that_will_show_only_size = ["subset"]) store_value_of_simple_vars_in_json_file(masterdir + 'program_state_stack.json', options.__dict__, write_or_append='a') # mpi_barrier(mpi_comm) # from mpi import mpi_finalize # mpi_finalize() # print "mpi finalize" # from sys import exit # exit() out_params, out_vol, out_peaks = multi_shc(all_projs, subset, no_of_shc_runs_analyzed_together, options, mpi_comm=mpi_comm, log=log, ref_vol=ref_vol) # end of: if this_run_is_NOT_complete: if runs_iter >= (no_of_viper_runs_analyzed_together_from_user_options - 1): increment_for_current_iteration = identify_outliers(myid, main_node, rviper_iter, no_of_viper_runs_analyzed_together, no_of_viper_runs_analyzed_together_from_user_options, masterdir, bdb_stack_location, outlier_percentile, criterion_name, outlier_index_threshold_method, angle_threshold) if increment_for_current_iteration == MUST_END_PROGRAM_THIS_ITERATION: break no_of_viper_runs_analyzed_together += increment_for_current_iteration # end of independent viper loop calculate_volumes_after_rotation_and_save_them(options, rviper_iter, masterdir, bdb_stack_location, myid, mpi_size, no_of_viper_runs_analyzed_together, no_of_viper_runs_analyzed_together_from_user_options) if increment_for_current_iteration == MUST_END_PROGRAM_THIS_ITERATION: if (myid == main_node): print "RVIPER found a core set of stable projections for the current RVIPER iteration (%d), the maximum angle difference between corresponding projections from different VIPER volumes is less than %.2f. Finishing."%(rviper_iter, ANGLE_ERROR_THRESHOLD) break else: if (myid == main_node): print "After running the last iteration (%d), RVIPER did not find a set of projections with the maximum angle difference between corresponding projections from different VIPER volumes less than %.2f Finishing."%(rviper_iter, ANGLE_ERROR_THRESHOLD) # end of RVIPER loop #mpi_finalize() #sys.exit() mpi_barrier(MPI_COMM_WORLD) mpi_finalize()
def cml_find_structure2(Prj, Ori, Rot, outdir, outname, maxit, first_zero, flag_weights, myid, main_node, number_of_proc): from projection import cml_export_progress, cml_disc, cml_export_txtagls import time, sys from random import shuffle,random from mpi import MPI_FLOAT, MPI_INT, MPI_SUM, MPI_COMM_WORLD from mpi import mpi_reduce, mpi_bcast, mpi_barrier # global vars global g_i_prj, g_n_prj, g_n_anglst, g_anglst, g_d_psi, g_debug, g_n_lines, g_seq # list of free orientation ocp = [-1] * g_n_anglst if first_zero: listprj = range(1, g_n_prj) ocp[0] = 0 else: listprj = range(g_n_prj) # to stop when the solution oscillates period_disc = [0, 0, 0] period_ct = 0 period_th = 2 #if not flag_weights: weights = [1.0] * g_n_lines # iteration loop for ite in xrange(maxit): #print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ite = ", ite, " myid = ", myid t_start = time.time() # loop over i prj change = False tlistprj = listprj[:] shuffle(tlistprj) nnn = len(tlistprj) tlistprj = mpi_bcast(tlistprj, nnn, MPI_INT, main_node, MPI_COMM_WORLD) tlistprj = map(int, tlistprj) """ if(ite>1 and ite%5 == 0 and ite<140): if(myid == main_node): for i in xrange(0,len(tlistprj),5): ind = 4*i Ori[ind] = 360.*random() Ori[ind+1] = 180.*random() Ori[ind+2] = 360.*random() Ori[ind+3] = -1 for i in xrange(len(tlistprj)): ind = 4*i Ori[ind+3] = float(Ori[ind+3]) nnn = len(Ori) Ori = mpi_bcast(Ori, nnn, MPI_FLOAT, main_node, MPI_COMM_WORLD) Ori = map(float, Ori) for i in xrange(len(tlistprj)): ind = 4*i Ori[ind+3] = int(Ori[ind+3]) """ for iprj in tlistprj: #print "********************************** iprj = ", iprj, g_n_anglst # Store current the current orientation ind = 4*iprj store_phi = Ori[ind] store_theta = Ori[ind+1] store_psi = Ori[ind+2] cur_agl = Ori[ind+3] if cur_agl != -1: ocp[cur_agl] = -1 # prepare active index of cml for weighting in order to earn time later iw = [0] * (g_n_prj - 1) c = 0 ct = 0 for i in xrange(g_n_prj): for j in xrange(i+1, g_n_prj): if i == iprj or j == iprj: iw[ct] = c ct += 1 c += 1 # loop over all angles best_disc_list = [0]*g_n_anglst best_psi_list = [0]*g_n_anglst for iagl in xrange(myid, g_n_anglst, number_of_proc): # if orientation is free if ocp[iagl] == -1: # assign new orientation Ori[ind] = g_anglst[iagl][0] Ori[ind+1] = g_anglst[iagl][1] Rot = Util.cml_update_rot(Rot, iprj, Ori[ind], Ori[ind+1], 0.0) # weights if flag_weights: cml = Util.cml_line_in3d(Ori, g_seq, g_n_prj, g_n_lines) weights = Util.cml_weights(cml) mw = max(weights) for i in xrange(g_n_lines): weights[i] = mw - weights[i] sw = sum(weights) if sw == 0: weights = [6.28 / float(g_n_lines)] * g_n_lines else: for i in xrange(g_n_lines): weights[i] /= sw weights[i] *= weights[i] # spin all psi com = Util.cml_line_insino(Rot, iprj, g_n_prj) if flag_weights: res = Util.cml_spin_psi(Prj, com, weights, iprj, iw, g_n_psi, g_d_psi, g_n_prj) else: res = Util.cml_spin_psi_now(Prj, com, iprj, iw, g_n_psi, g_d_psi, g_n_prj) # select the best best_disc_list[iagl] = res[0] best_psi_list[iagl] = res[1] if g_debug: cml_export_progress(outdir, ite, iprj, iagl, res[1], res[0], 'progress') else: if g_debug: cml_export_progress(outdir, ite, iprj, iagl, -1, -1, 'progress') best_disc_list = mpi_reduce(best_disc_list, g_n_anglst, MPI_FLOAT, MPI_SUM, main_node, MPI_COMM_WORLD) best_psi_list = mpi_reduce(best_psi_list, g_n_anglst, MPI_FLOAT, MPI_SUM, main_node, MPI_COMM_WORLD) best_psi = -1 best_iagl = -1 if myid == main_node: best_disc = 1.0e20 for iagl in xrange(g_n_anglst): if best_disc_list[iagl] > 0.0 and best_disc_list[iagl] < best_disc: best_disc = best_disc_list[iagl] best_psi = best_psi_list[iagl] best_iagl = iagl best_psi = mpi_bcast(best_psi, 1, MPI_FLOAT, main_node, MPI_COMM_WORLD) best_iagl = mpi_bcast(best_iagl, 1, MPI_INT, main_node, MPI_COMM_WORLD) best_psi = float(best_psi[0]) best_iagl = int(best_iagl[0]) #print "xxxxx myid = ", myid, " best_psi = ", best_psi, " best_ialg = ", best_iagl # if change, assign if best_iagl != cur_agl: ocp[best_iagl] = iprj Ori[ind] = g_anglst[best_iagl][0] # phi Ori[ind+1] = g_anglst[best_iagl][1] # theta Ori[ind+2] = best_psi * g_d_psi # psi Ori[ind+3] = best_iagl # index change = True else: if cur_agl != -1: ocp[cur_agl] = iprj Ori[ind] = store_phi Ori[ind+1] = store_theta Ori[ind+2] = store_psi Ori[ind+3] = cur_agl Rot = Util.cml_update_rot(Rot, iprj, Ori[ind], Ori[ind+1], Ori[ind+2]) if g_debug: cml_export_progress(outdir, ite, iprj, best_iagl, best_psi * g_d_psi, best_disc, 'choose') # if one change, compute new full disc disc = cml_disc(Prj, Ori, Rot, flag_weights) # display in the progress file if myid == main_node: cml_export_txtagls(outdir, outname, Ori, disc, 'Ite: %03i' % (ite + 1)) if not change: break # to stop when the solution oscillates period_disc.pop(0) period_disc.append(disc) if period_disc[0] == period_disc[2]: period_ct += 1 if period_ct >= period_th and min(period_disc) == disc and myid == main_node: angfile = open(outdir + '/' + outname, 'a') angfile.write('\nSTOP SOLUTION UNSTABLE\n') angfile.write('Discrepancy period: %s\n' % period_disc) angfile.close() break else: period_ct = 0 mpi_barrier(MPI_COMM_WORLD) return Ori, disc, ite
def compare(compare_ref_free, outfile_repro,ref_free_output,yrng, xrng, rstep,nx,apix,ref_free_cutoff, nproc, myid, main_node): from alignment import Numrinit, ringwe, Applyws from random import seed, randint from utilities import get_params2D, set_params2D, model_circle, inverse_transform2, combine_params2 from fundamentals import rot_shift2D from mpi import MPI_COMM_WORLD, mpi_barrier, mpi_bcast, MPI_INT from statistics import fsc_mask from filter import fit_tanh from numpy import array fout = "%s.hdf" % ref_free_output frc_out = "%s_frc" % ref_free_output res_out = "%s_res" % ref_free_output nima = EMUtil.get_image_count(compare_ref_free) image_start, image_end = MPI_start_end(nima, nproc, myid) ima = EMData() ima.read_image(compare_ref_free, image_start) last_ring = nx/2-2 first_ring = 1 mask = model_circle(last_ring, nx, nx) refi = [] numref = EMUtil.get_image_count(outfile_repro) cnx = nx/2 +1 cny = cnx mode = "F" numr = Numrinit(first_ring, last_ring, rstep, mode) wr = ringwe(numr, mode) ima.to_zero() for j in xrange(numref): temp = EMData() temp.read_image(outfile_repro, j) # even, odd, numer of even, number of images. After frc, totav refi.append(temp) # for each node read its share of data data = EMData.read_images(compare_ref_free, range(image_start, image_end)) for im in xrange(image_start, image_end): data[im-image_start].set_attr('ID', im) set_params2D(data[im-image_start],[0,0,0,0,1]) ringref = [] for j in xrange(numref): refi[j].process_inplace("normalize.mask", {"mask":mask, "no_sigma":1}) # normalize reference images to N(0,1) cimage = Util.Polar2Dm(refi[j], cnx, cny, numr, mode) Util.Frngs(cimage, numr) Applyws(cimage, numr, wr) ringref.append(cimage) if myid == main_node: seed(1000) data_shift = [] frc = [] res = [] for im in xrange(image_start, image_end): alpha, sx, sy, mirror, scale = get_params2D(data[im-image_start]) alphai, sxi, syi, scalei = inverse_transform2(alpha, sx, sy, 1.0) # normalize data[im-image_start].process_inplace("normalize.mask", {"mask":mask, "no_sigma":1}) # subtract average under the mask # align current image to the reference [angt, sxst, syst, mirrort, xiref, peakt] = Util.multiref_polar_ali_2d(data[im-image_start], ringref, xrng, yrng, 1, mode, numr, cnx+sxi, cny+syi) iref = int(xiref) [alphan, sxn, syn, mn] = combine_params2(0.0, -sxi, -syi, 0, angt, sxst, syst, (int)(mirrort)) set_params2D(data[im-image_start], [alphan, sxn, syn, int(mn), scale]) temp = rot_shift2D(data[im-image_start], alphan, sxn, syn, mn) temp.set_attr('assign',iref) tfrc = fsc_mask(temp,refi[iref],mask = mask) temp.set_attr('frc',tfrc[1]) res = fit_tanh(tfrc) temp.set_attr('res',res) data_shift.append(temp) for node in xrange(nproc): if myid == node: for image in data_shift: image.write_image(fout,-1) refindex = image.get_attr('assign') refi[refindex].write_image(fout,-1) mpi_barrier(MPI_COMM_WORLD) rejects = [] if myid == main_node: a = EMData() index = 0 frc = [] res = [] temp = [] classes = [] for im in xrange(nima): a.read_image(fout, index) frc.append(a.get_attr("frc")) if ref_free_cutoff != -1: classes.append(a.get_attr("class_ptcl_idxs")) tmp = a.get_attr("res") temp.append(tmp[0]) res.append("%12f" %(apix/tmp[0])) res.append("\n") index = index + 2 res_num = array(temp) mean_score = res_num.mean(axis=0) std_score = res_num.std(axis=0) std = std_score / 2 if ref_free_cutoff !=-1: cutoff = mean_score - std * ref_free_cutoff reject = res_num < cutoff index = 0 for i in reject: if i: rejects.extend(classes[index]) index = index + 1 rejects.sort() length = mpi_bcast(len(rejects),1,MPI_INT,main_node, MPI_COMM_WORLD) rejects = mpi_bcast(rejects,length , MPI_INT, main_node, MPI_COMM_WORLD) del a fout_frc = open(frc_out,'w') fout_res = open(res_out,'w') fout_res.write("".join(res)) temp = zip(*frc) datstrings = [] for i in temp: for j in i: datstrings.append(" %12f" % (j)) datstrings.append("\n") fout_frc.write("".join(datstrings)) fout_frc.close() del refi del ringref return rejects
def ali3d_MPI(stack, ref_vol, outdir, maskfile = None, ir = 1, ou = -1, rs = 1, xr = "4 2 2 1", yr = "-1", ts = "1 1 0.5 0.25", delta = "10 6 4 4", an = "-1", center = 0, maxit = 5, term = 95, CTF = False, fourvar = False, snr = 1.0, ref_a = "S", sym = "c1", sort=True, cutoff=999.99, pix_cutoff="0", two_tail=False, model_jump="1 1 1 1 1", restart=False, save_half=False, protos=None, oplane=None, lmask=-1, ilmask=-1, findseam=False, vertstep=None, hpars="-1", hsearch="73.0 170.0", full_output = False, compare_repro = False, compare_ref_free = "-1", ref_free_cutoff= "-1 -1 -1 -1", wcmask = None, debug = False, recon_pad = 4): from alignment import Numrinit, prepare_refrings from utilities import model_circle, get_image, drop_image, get_input_from_string from utilities import bcast_list_to_all, bcast_number_to_all, reduce_EMData_to_root, bcast_EMData_to_all from utilities import send_attr_dict from utilities import get_params_proj, file_type from fundamentals import rot_avg_image import os import types from utilities import print_begin_msg, print_end_msg, print_msg from mpi import mpi_bcast, mpi_comm_size, mpi_comm_rank, MPI_FLOAT, MPI_COMM_WORLD, mpi_barrier, mpi_reduce from mpi import mpi_reduce, MPI_INT, MPI_SUM, mpi_finalize from filter import filt_ctf from projection import prep_vol, prgs from statistics import hist_list, varf3d_MPI, fsc_mask from numpy import array, bincount, array2string, ones number_of_proc = mpi_comm_size(MPI_COMM_WORLD) myid = mpi_comm_rank(MPI_COMM_WORLD) main_node = 0 if myid == main_node: if os.path.exists(outdir): ERROR('Output directory exists, please change the name and restart the program', "ali3d_MPI", 1) os.mkdir(outdir) mpi_barrier(MPI_COMM_WORLD) if debug: from time import sleep while not os.path.exists(outdir): print "Node ",myid," waiting..." sleep(5) info_file = os.path.join(outdir, "progress%04d"%myid) finfo = open(info_file, 'w') else: finfo = None mjump = get_input_from_string(model_jump) xrng = get_input_from_string(xr) if yr == "-1": yrng = xrng else : yrng = get_input_from_string(yr) step = get_input_from_string(ts) delta = get_input_from_string(delta) ref_free_cutoff = get_input_from_string(ref_free_cutoff) pix_cutoff = get_input_from_string(pix_cutoff) lstp = min(len(xrng), len(yrng), len(step), len(delta)) if an == "-1": an = [-1] * lstp else: an = get_input_from_string(an) # make sure pix_cutoff is set for all iterations if len(pix_cutoff)<lstp: for i in xrange(len(pix_cutoff),lstp): pix_cutoff.append(pix_cutoff[-1]) # don't waste time on sub-pixel alignment for low-resolution ang incr for i in range(len(step)): if (delta[i] > 4 or delta[i] == -1) and step[i] < 1: step[i] = 1 first_ring = int(ir) rstep = int(rs) last_ring = int(ou) max_iter = int(maxit) center = int(center) nrefs = EMUtil.get_image_count( ref_vol ) nmasks = 0 if maskfile: # read number of masks within each maskfile (mc) nmasks = EMUtil.get_image_count( maskfile ) # open masks within maskfile (mc) maskF = EMData.read_images(maskfile, xrange(nmasks)) vol = EMData.read_images(ref_vol, xrange(nrefs)) nx = vol[0].get_xsize() ## make sure box sizes are the same if myid == main_node: im=EMData.read_images(stack,[0]) bx = im[0].get_xsize() if bx!=nx: print_msg("Error: Stack box size (%i) differs from initial model (%i)\n"%(bx,nx)) sys.exit() del im,bx # for helical processing: helicalrecon = False if protos is not None or hpars != "-1" or findseam is True: helicalrecon = True # if no out-of-plane param set, use 5 degrees if oplane is None: oplane=5.0 if protos is not None: proto = get_input_from_string(protos) if len(proto) != nrefs: print_msg("Error: insufficient protofilament numbers supplied") sys.exit() if hpars != "-1": hpars = get_input_from_string(hpars) if len(hpars) != 2*nrefs: print_msg("Error: insufficient helical parameters supplied") sys.exit() ## create helical parameter file for helical reconstruction if helicalrecon is True and myid == main_node: from hfunctions import createHpar # create initial helical parameter files dp=[0]*nrefs dphi=[0]*nrefs vdp=[0]*nrefs vdphi=[0]*nrefs for iref in xrange(nrefs): hpar = os.path.join(outdir,"hpar%02d.spi"%(iref)) params = False if hpars != "-1": # if helical parameters explicitly given, set twist & rise params = [float(hpars[iref*2]),float(hpars[(iref*2)+1])] dp[iref],dphi[iref],vdp[iref],vdphi[iref] = createHpar(hpar,proto[iref],params,vertstep) # get values for helical search parameters hsearch = get_input_from_string(hsearch) if len(hsearch) != 2: print_msg("Error: specify outer and inner radii for helical search") sys.exit() if last_ring < 0 or last_ring > int(nx/2)-2 : last_ring = int(nx/2) - 2 if myid == main_node: # import user_functions # user_func = user_functions.factory[user_func_name] print_begin_msg("ali3d_MPI") print_msg("Input stack : %s\n"%(stack)) print_msg("Reference volume : %s\n"%(ref_vol)) print_msg("Output directory : %s\n"%(outdir)) if nmasks > 0: print_msg("Maskfile (number of masks) : %s (%i)\n"%(maskfile,nmasks)) print_msg("Inner radius : %i\n"%(first_ring)) print_msg("Outer radius : %i\n"%(last_ring)) print_msg("Ring step : %i\n"%(rstep)) print_msg("X search range : %s\n"%(xrng)) print_msg("Y search range : %s\n"%(yrng)) print_msg("Translational step : %s\n"%(step)) print_msg("Angular step : %s\n"%(delta)) print_msg("Angular search range : %s\n"%(an)) print_msg("Maximum iteration : %i\n"%(max_iter)) print_msg("Center type : %i\n"%(center)) print_msg("CTF correction : %s\n"%(CTF)) print_msg("Signal-to-Noise Ratio : %f\n"%(snr)) print_msg("Reference projection method : %s\n"%(ref_a)) print_msg("Symmetry group : %s\n"%(sym)) print_msg("Fourier padding for 3D : %i\n"%(recon_pad)) print_msg("Number of reference models : %i\n"%(nrefs)) print_msg("Sort images between models : %s\n"%(sort)) print_msg("Allow images to jump : %s\n"%(mjump)) print_msg("CC cutoff standard dev : %f\n"%(cutoff)) print_msg("Two tail cutoff : %s\n"%(two_tail)) print_msg("Termination pix error : %f\n"%(term)) print_msg("Pixel error cutoff : %s\n"%(pix_cutoff)) print_msg("Restart : %s\n"%(restart)) print_msg("Full output : %s\n"%(full_output)) print_msg("Compare reprojections : %s\n"%(compare_repro)) print_msg("Compare ref free class avgs : %s\n"%(compare_ref_free)) print_msg("Use cutoff from ref free : %s\n"%(ref_free_cutoff)) if protos: print_msg("Protofilament numbers : %s\n"%(proto)) print_msg("Using helical search range : %s\n"%hsearch) if findseam is True: print_msg("Using seam-based reconstruction\n") if hpars != "-1": print_msg("Using hpars : %s\n"%hpars) if vertstep != None: print_msg("Using vertical step : %.2f\n"%vertstep) if save_half is True: print_msg("Saving even/odd halves\n") for i in xrange(100) : print_msg("*") print_msg("\n\n") if maskfile: if type(maskfile) is types.StringType: mask3D = get_image(maskfile) else: mask3D = maskfile else: mask3D = model_circle(last_ring, nx, nx, nx) numr = Numrinit(first_ring, last_ring, rstep, "F") mask2D = model_circle(last_ring,nx,nx) - model_circle(first_ring,nx,nx) fscmask = model_circle(last_ring,nx,nx,nx) if CTF: from filter import filt_ctf from reconstruction_rjh import rec3D_MPI_noCTF if myid == main_node: active = EMUtil.get_all_attributes(stack, 'active') list_of_particles = [] for im in xrange(len(active)): if active[im]: list_of_particles.append(im) del active nima = len(list_of_particles) else: nima = 0 total_nima = bcast_number_to_all(nima, source_node = main_node) if myid != main_node: list_of_particles = [-1]*total_nima list_of_particles = bcast_list_to_all(list_of_particles, source_node = main_node) image_start, image_end = MPI_start_end(total_nima, number_of_proc, myid) # create a list of images for each node list_of_particles = list_of_particles[image_start: image_end] nima = len(list_of_particles) if debug: finfo.write("image_start, image_end: %d %d\n" %(image_start, image_end)) finfo.flush() data = EMData.read_images(stack, list_of_particles) t_zero = Transform({"type":"spider","phi":0,"theta":0,"psi":0,"tx":0,"ty":0}) transmulti = [[t_zero for i in xrange(nrefs)] for j in xrange(nima)] for iref,im in ((iref,im) for iref in xrange(nrefs) for im in xrange(nima)): if nrefs == 1: transmulti[im][iref] = data[im].get_attr("xform.projection") else: # if multi models, keep track of eulers for all models try: transmulti[im][iref] = data[im].get_attr("eulers_txty.%i"%iref) except: data[im].set_attr("eulers_txty.%i"%iref,t_zero) scoremulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)] pixelmulti = [[0.0 for i in xrange(nrefs)] for j in xrange(nima)] ref_res = [0.0 for x in xrange(nrefs)] apix = data[0].get_attr('apix_x') # for oplane parameter, create cylindrical mask if oplane is not None and myid == main_node: from hfunctions import createCylMask cmaskf=os.path.join(outdir, "mask3D_cyl.mrc") mask3D = createCylMask(data,ou,lmask,ilmask,cmaskf) # if finding seam of helix, create wedge masks if findseam is True: wedgemask=[] for pf in xrange(nrefs): wedgemask.append(EMData()) # wedgemask option if wcmask is not None: wcmask = get_input_from_string(wcmask) if len(wcmask) != 3: print_msg("Error: wcmask option requires 3 values: x y radius") sys.exit() # determine if particles have helix info: try: data[0].get_attr('h_angle') original_data = [] boxmask = True from hfunctions import createBoxMask except: boxmask = False # prepare particles for im in xrange(nima): data[im].set_attr('ID', list_of_particles[im]) data[im].set_attr('pix_score', int(0)) if CTF: # only phaseflip particles, not full CTF correction ctf_params = data[im].get_attr("ctf") st = Util.infomask(data[im], mask2D, False) data[im] -= st[0] data[im] = filt_ctf(data[im], ctf_params, sign = -1, binary=1) data[im].set_attr('ctf_applied', 1) # for window mask: if boxmask is True: h_angle = data[im].get_attr("h_angle") original_data.append(data[im].copy()) bmask = createBoxMask(nx,apix,ou,lmask,h_angle) data[im]*=bmask del bmask if debug: finfo.write( '%d loaded \n' % nima ) finfo.flush() if myid == main_node: # initialize data for the reference preparation function ref_data = [ mask3D, max(center,0), None, None, None, None ] # for method -1, switch off centering in user function from time import time # this is needed for gathering of pixel errors disps = [] recvcount = [] disps_score = [] recvcount_score = [] for im in xrange(number_of_proc): if( im == main_node ): disps.append(0) disps_score.append(0) else: disps.append(disps[im-1] + recvcount[im-1]) disps_score.append(disps_score[im-1] + recvcount_score[im-1]) ib, ie = MPI_start_end(total_nima, number_of_proc, im) recvcount.append( ie - ib ) recvcount_score.append((ie-ib)*nrefs) pixer = [0.0]*nima cs = [0.0]*3 total_iter = 0 volodd = EMData.read_images(ref_vol, xrange(nrefs)) voleve = EMData.read_images(ref_vol, xrange(nrefs)) if restart: # recreate initial volumes from alignments stored in header itout = "000_00" for iref in xrange(nrefs): if(nrefs == 1): modout = "" else: modout = "_model_%02d"%(iref) if(sort): group = iref for im in xrange(nima): imgroup = data[im].get_attr('group') if imgroup == iref: data[im].set_attr('xform.projection',transmulti[im][iref]) else: group = int(999) for im in xrange(nima): data[im].set_attr('xform.projection',transmulti[im][iref]) fscfile = os.path.join(outdir, "fsc_%s%s"%(itout,modout)) vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF(data, sym, fscmask, fscfile, myid, main_node, index = group, npad = recon_pad) if myid == main_node: if helicalrecon: from hfunctions import processHelicalVol vstep=None if vertstep is not None: vstep=(vdp[iref],vdphi[iref]) print_msg("Old rise and twist for model %i : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref])) hvals=processHelicalVol(vol[iref],voleve[iref],volodd[iref],iref,outdir,itout, dp[iref],dphi[iref],apix,hsearch,findseam,vstep,wcmask) (vol[iref],voleve[iref],volodd[iref],dp[iref],dphi[iref],vdp[iref],vdphi[iref])=hvals print_msg("New rise and twist for model %i : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref])) # get new FSC from symmetrized half volumes fscc = fsc_mask( volodd[iref], voleve[iref], mask3D, rstep, fscfile) else: vol[iref].write_image(os.path.join(outdir, "vol_%s.hdf"%itout),-1) if save_half is True: volodd[iref].write_image(os.path.join(outdir, "volodd_%s.hdf"%itout),-1) voleve[iref].write_image(os.path.join(outdir, "voleve_%s.hdf"%itout),-1) if nmasks > 1: # Read mask for multiplying ref_data[0] = maskF[iref] ref_data[2] = vol[iref] ref_data[3] = fscc # call user-supplied function to prepare reference image, i.e., center and filter it vol[iref], cs,fl = ref_ali3d(ref_data) vol[iref].write_image(os.path.join(outdir, "volf_%s.hdf"%(itout)),-1) if (apix == 1): res_msg = "Models filtered at spatial frequency of:\t" res = fl else: res_msg = "Models filtered at resolution of: \t" res = apix / fl ares = array2string(array(res), precision = 2) print_msg("%s%s\n\n"%(res_msg,ares)) bcast_EMData_to_all(vol[iref], myid, main_node) # write out headers, under MPI writing has to be done sequentially mpi_barrier(MPI_COMM_WORLD) # projection matching for N_step in xrange(lstp): terminate = 0 Iter = -1 while(Iter < max_iter-1 and terminate == 0): Iter += 1 total_iter += 1 itout = "%03g_%02d" %(delta[N_step], Iter) if myid == main_node: print_msg("ITERATION #%3d, inner iteration #%3d\nDelta = %4.1f, an = %5.2f, xrange = %5.2f, yrange = %5.2f, step = %5.2f\n\n"%(N_step, Iter, delta[N_step], an[N_step], xrng[N_step],yrng[N_step],step[N_step])) for iref in xrange(nrefs): if myid == main_node: start_time = time() volft,kb = prep_vol( vol[iref] ) ## constrain projections to out of plane parameter theta1 = None theta2 = None if oplane is not None: theta1 = 90-oplane theta2 = 90+oplane refrings = prepare_refrings( volft, kb, nx, delta[N_step], ref_a, sym, numr, MPI=True, phiEqpsi = "Minus", initial_theta=theta1, delta_theta=theta2) del volft,kb if myid== main_node: print_msg( "Time to prepare projections for model %i: %s\n" % (iref, legibleTime(time()-start_time)) ) start_time = time() for im in xrange( nima ): data[im].set_attr("xform.projection", transmulti[im][iref]) if an[N_step] == -1: t1, peak, pixer[im] = proj_ali_incore(data[im],refrings,numr,xrng[N_step],yrng[N_step],step[N_step],finfo) else: t1, peak, pixer[im] = proj_ali_incore_local(data[im],refrings,numr,xrng[N_step],yrng[N_step],step[N_step],an[N_step],finfo) #data[im].set_attr("xform.projection"%iref, t1) if nrefs > 1: data[im].set_attr("eulers_txty.%i"%iref,t1) scoremulti[im][iref] = peak from pixel_error import max_3D_pixel_error # t1 is the current param, t2 is old t2 = transmulti[im][iref] pixelmulti[im][iref] = max_3D_pixel_error(t1,t2,numr[-3]) transmulti[im][iref] = t1 if myid == main_node: print_msg("Time of alignment for model %i: %s\n"%(iref, legibleTime(time()-start_time))) start_time = time() # gather scoring data from all processors from mpi import mpi_gatherv scoremultisend = sum(scoremulti,[]) pixelmultisend = sum(pixelmulti,[]) tmp = mpi_gatherv(scoremultisend,len(scoremultisend),MPI_FLOAT, recvcount_score, disps_score, MPI_FLOAT, main_node,MPI_COMM_WORLD) tmp1 = mpi_gatherv(pixelmultisend,len(pixelmultisend),MPI_FLOAT, recvcount_score, disps_score, MPI_FLOAT, main_node,MPI_COMM_WORLD) tmp = mpi_bcast(tmp,(total_nima * nrefs), MPI_FLOAT,0, MPI_COMM_WORLD) tmp1 = mpi_bcast(tmp1,(total_nima * nrefs), MPI_FLOAT,0, MPI_COMM_WORLD) tmp = map(float,tmp) tmp1 = map(float,tmp1) score = array(tmp).reshape(-1,nrefs) pixelerror = array(tmp1).reshape(-1,nrefs) score_local = array(scoremulti) mean_score = score.mean(axis=0) std_score = score.std(axis=0) cut = mean_score - (cutoff * std_score) cut2 = mean_score + (cutoff * std_score) res_max = score_local.argmax(axis=1) minus_cc = [0.0 for x in xrange(nrefs)] minus_pix = [0.0 for x in xrange(nrefs)] minus_ref = [0.0 for x in xrange(nrefs)] #output pixel errors if(myid == main_node): from statistics import hist_list lhist = 20 pixmin = pixelerror.min(axis=1) region, histo = hist_list(pixmin, lhist) if(region[0] < 0.0): region[0] = 0.0 print_msg("Histogram of pixel errors\n ERROR number of particles\n") for lhx in xrange(lhist): print_msg(" %10.3f %7d\n"%(region[lhx], histo[lhx])) # Terminate if 95% within 1 pixel error im = 0 for lhx in xrange(lhist): if(region[lhx] > 1.0): break im += histo[lhx] print_msg( "Percent of particles with pixel error < 1: %f\n\n"% (im/float(total_nima)*100)) term_cond = float(term)/100 if(im/float(total_nima) > term_cond): terminate = 1 print_msg("Terminating internal loop\n") del region, histo terminate = mpi_bcast(terminate, 1, MPI_INT, 0, MPI_COMM_WORLD) terminate = int(terminate[0]) for im in xrange(nima): if(sort==False): data[im].set_attr('group',999) elif (mjump[N_step]==1): data[im].set_attr('group',int(res_max[im])) pix_run = data[im].get_attr('pix_score') if (pix_cutoff[N_step]==1 and (terminate==1 or Iter == max_iter-1)): if (pixelmulti[im][int(res_max[im])] > 1): data[im].set_attr('pix_score',int(777)) if (score_local[im][int(res_max[im])]<cut[int(res_max[im])]) or (two_tail and score_local[im][int(res_max[im])]>cut2[int(res_max[im])]): data[im].set_attr('group',int(888)) minus_cc[int(res_max[im])] = minus_cc[int(res_max[im])] + 1 if(pix_run == 777): data[im].set_attr('group',int(777)) minus_pix[int(res_max[im])] = minus_pix[int(res_max[im])] + 1 if (compare_ref_free != "-1") and (ref_free_cutoff[N_step] != -1) and (total_iter > 1): id = data[im].get_attr('ID') if id in rejects: data[im].set_attr('group',int(666)) minus_ref[int(res_max[im])] = minus_ref[int(res_max[im])] + 1 minus_cc_tot = mpi_reduce(minus_cc,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD) minus_pix_tot = mpi_reduce(minus_pix,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD) minus_ref_tot = mpi_reduce(minus_ref,nrefs,MPI_FLOAT,MPI_SUM,0,MPI_COMM_WORLD) if (myid == main_node): if(sort): tot_max = score.argmax(axis=1) res = bincount(tot_max) else: res = ones(nrefs) * total_nima print_msg("Particle distribution: \t\t%s\n"%(res*1.0)) afcut1 = res - minus_cc_tot afcut2 = afcut1 - minus_pix_tot afcut3 = afcut2 - minus_ref_tot print_msg("Particle distribution after cc cutoff:\t\t%s\n"%(afcut1)) print_msg("Particle distribution after pix cutoff:\t\t%s\n"%(afcut2)) print_msg("Particle distribution after ref cutoff:\t\t%s\n\n"%(afcut3)) res = [0.0 for i in xrange(nrefs)] for iref in xrange(nrefs): if(center == -1): from utilities import estimate_3D_center_MPI, rotate_3D_shift dummy=EMData() cs[0], cs[1], cs[2], dummy, dummy = estimate_3D_center_MPI(data, total_nima, myid, number_of_proc, main_node) cs = mpi_bcast(cs, 3, MPI_FLOAT, main_node, MPI_COMM_WORLD) cs = [-float(cs[0]), -float(cs[1]), -float(cs[2])] rotate_3D_shift(data, cs) if(sort): group = iref for im in xrange(nima): imgroup = data[im].get_attr('group') if imgroup == iref: data[im].set_attr('xform.projection',transmulti[im][iref]) else: group = int(999) for im in xrange(nima): data[im].set_attr('xform.projection',transmulti[im][iref]) if(nrefs == 1): modout = "" else: modout = "_model_%02d"%(iref) fscfile = os.path.join(outdir, "fsc_%s%s"%(itout,modout)) vol[iref], fscc, volodd[iref], voleve[iref] = rec3D_MPI_noCTF(data, sym, fscmask, fscfile, myid, main_node, index=group, npad=recon_pad) if myid == main_node: print_msg("3D reconstruction time for model %i: %s\n"%(iref, legibleTime(time()-start_time))) start_time = time() # Compute Fourier variance if fourvar: outvar = os.path.join(outdir, "volVar_%s.hdf"%(itout)) ssnr_file = os.path.join(outdir, "ssnr_%s"%(itout)) varf = varf3d_MPI(data, ssnr_text_file=ssnr_file, mask2D=None, reference_structure=vol[iref], ou=last_ring, rw=1.0, npad=1, CTF=None, sign=1, sym=sym, myid=myid) if myid == main_node: print_msg("Time to calculate 3D Fourier variance for model %i: %s\n"%(iref, legibleTime(time()-start_time))) start_time = time() varf = 1.0/varf varf.write_image(outvar,-1) else: varf = None if myid == main_node: if helicalrecon: from hfunctions import processHelicalVol vstep=None if vertstep is not None: vstep=(vdp[iref],vdphi[iref]) print_msg("Old rise and twist for model %i : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref])) hvals=processHelicalVol(vol[iref],voleve[iref],volodd[iref],iref,outdir,itout, dp[iref],dphi[iref],apix,hsearch,findseam,vstep,wcmask) (vol[iref],voleve[iref],volodd[iref],dp[iref],dphi[iref],vdp[iref],vdphi[iref])=hvals print_msg("New rise and twist for model %i : %8.3f, %8.3f\n"%(iref,dp[iref],dphi[iref])) # get new FSC from symmetrized half volumes fscc = fsc_mask( volodd[iref], voleve[iref], mask3D, rstep, fscfile) print_msg("Time to search and apply helical symmetry for model %i: %s\n\n"%(iref, legibleTime(time()-start_time))) start_time = time() else: vol[iref].write_image(os.path.join(outdir, "vol_%s.hdf"%(itout)),-1) if save_half is True: volodd[iref].write_image(os.path.join(outdir, "volodd_%s.hdf"%(itout)),-1) voleve[iref].write_image(os.path.join(outdir, "voleve_%s.hdf"%(itout)),-1) if nmasks > 1: # Read mask for multiplying ref_data[0] = maskF[iref] ref_data[2] = vol[iref] ref_data[3] = fscc ref_data[4] = varf # call user-supplied function to prepare reference image, i.e., center and filter it vol[iref], cs,fl = ref_ali3d(ref_data) vol[iref].write_image(os.path.join(outdir, "volf_%s.hdf"%(itout)),-1) if (apix == 1): res_msg = "Models filtered at spatial frequency of:\t" res[iref] = fl else: res_msg = "Models filtered at resolution of: \t" res[iref] = apix / fl del varf bcast_EMData_to_all(vol[iref], myid, main_node) if compare_ref_free != "-1": compare_repro = True if compare_repro: outfile_repro = comp_rep(refrings, data, itout, modout, vol[iref], group, nima, nx, myid, main_node, outdir) mpi_barrier(MPI_COMM_WORLD) if compare_ref_free != "-1": ref_free_output = os.path.join(outdir,"ref_free_%s%s"%(itout,modout)) rejects = compare(compare_ref_free, outfile_repro,ref_free_output,yrng[N_step], xrng[N_step], rstep,nx,apix,ref_free_cutoff[N_step], number_of_proc, myid, main_node) # retrieve alignment params from all processors par_str = ['xform.projection','ID','group'] if nrefs > 1: for iref in xrange(nrefs): par_str.append('eulers_txty.%i'%iref) if myid == main_node: from utilities import recv_attr_dict recv_attr_dict(main_node, stack, data, par_str, image_start, image_end, number_of_proc) else: send_attr_dict(main_node, data, par_str, image_start, image_end) if myid == main_node: ares = array2string(array(res), precision = 2) print_msg("%s%s\n\n"%(res_msg,ares)) dummy = EMData() if full_output: nimat = EMUtil.get_image_count(stack) output_file = os.path.join(outdir, "paramout_%s"%itout) foutput = open(output_file, 'w') for im in xrange(nimat): # save the parameters for each of the models outstring = "" dummy.read_image(stack,im,True) param3d = dummy.get_attr('xform.projection') g = dummy.get_attr("group") # retrieve alignments in EMAN-format pE = param3d.get_params('eman') outstring += "%f\t%f\t%f\t%f\t%f\t%i\n" %(pE["az"], pE["alt"], pE["phi"], pE["tx"], pE["ty"],g) foutput.write(outstring) foutput.close() del dummy mpi_barrier(MPI_COMM_WORLD) # mpi_finalize() if myid == main_node: print_end_msg("ali3d_MPI")
def program_state_stack(full_current_state, frame_info, file_name_of_saved_state=None, last_call="", force_starting_execution = False): """ When used it needs: from inspect import currentframe, getframeinfo This function is used for restarting time consuming data processing programs/steps from the last saved point. This static variable must be defined before the first call: program_state_stack.PROGRAM_STATE_VARIABLES = {"local_var_i", "local_var_j", "local_var_h", "local_var_g"} It contains local variables at any level of the stack that define uniquely the state(flow/logic) of the program. It is assumed that the processed data is saved at each step and it is independent from the variables that uniquely define the state(flow/logic) of the program. All the variables that are used in more than one step must be calculated before the "if program_state_stack(locals(), getframeinfo(currentframe())):" call. It is assumed that they are not time consuming. Passing processed data from one step to the next is done only through files. First call needs to contain "file_name_of_saved_state". Then, the next calls are "if program_state_stack(locals(), getframeinfo(currentframe())):" to demarcate the blocks of processing steps that take a long time (hours/days). Example of initialization: program_state_stack.PROGRAM_STATE_VARIABLES = {"local_var_i", "local_var_j", "local_var_h", "local_var_g"} program_state_stack(locals(), getframeinfo(currentframe()), "my_state.json") Then regular usage in the data analysis program: if program_state_stack(locals(), getframeinfo(currentframe())): data_analysis_1() if program_state_stack(locals(), getframeinfo(currentframe())): data_analysis_2() """ import os from traceback import extract_stack from mpi import mpi_comm_rank, mpi_bcast, MPI_COMM_WORLD, MPI_INT from inspect import currentframe, getframeinfo def get_current_stack_info(): return [[x[0], x[2]] for x in extract_stack()[:-2]] START_EXECUTING_FALSE = 0 START_EXECUTING_TRUE = 1 START_EXECUTING_ONLY_ONE_TIME_THEN_REVERT = 2 current_state = dict() for var in program_state_stack.PROGRAM_STATE_VARIABLES & set(full_current_state) : current_state[var] = full_current_state[var] if "restart_location_title" in program_state_stack.__dict__: location_in_program = frame_info.filename + "___" + program_state_stack.restart_location_title del program_state_stack.restart_location_title else: location_in_program = frame_info.filename + "___" + str(frame_info.lineno) + "_" + last_call current_state["location_in_program"] = location_in_program current_stack = get_current_stack_info() error_status = 0 # not a real while, an if with the possibility of jumping with break while mpi_comm_rank(MPI_COMM_WORLD) == 0: if "file_name_of_saved_state" not in program_state_stack.__dict__: if type(file_name_of_saved_state) != type(""): error_status = ("Must provide the file name of saved state as a string in the first call of the function!", getframeinfo(currentframe())) break program_state_stack.file_name_of_saved_state = os.getcwd() + os.sep + file_name_of_saved_state program_state_stack.counter = 0 program_state_stack.track_stack = get_current_stack_info() program_state_stack.track_state = [dict() for i in xrange(len(program_state_stack.track_stack))] program_state_stack.track_state[-1] = current_state file_name_of_saved_state_contains_information = False if (os.path.exists(file_name_of_saved_state)): statinfo = os.stat(file_name_of_saved_state) file_name_of_saved_state_contains_information = statinfo.st_size > 0 if file_name_of_saved_state_contains_information: program_state_stack.saved_stack, \ program_state_stack.saved_state = restore_program_stack_and_state(file_name_of_saved_state) program_state_stack.start_executing = START_EXECUTING_FALSE else: # check to see if file can be created f = open(file_name_of_saved_state, "w"); f.close() program_state_stack.start_executing = START_EXECUTING_TRUE else: program_state_stack.counter += 1 # print "counter: ", program_state_stack.counter if program_state_stack.counter == program_state_stack.CCC: # error_status = ("Reached %d calls!"%program_state_stack.CCC, getframeinfo(currentframe())) error_status = 1 break if program_state_stack.start_executing == START_EXECUTING_ONLY_ONE_TIME_THEN_REVERT: program_state_stack.start_executing = START_EXECUTING_FALSE # correct track_state to reflect track_stack for i in xrange(len(current_stack)): if i < len(program_state_stack.track_state): if program_state_stack.track_stack[i] != current_stack[i]: program_state_stack.track_state[i] = dict() else: program_state_stack.track_state.append(dict()) program_state_stack.track_state[i] = current_state # correct track_stack to reflect current_stack program_state_stack.track_stack = current_stack # delete additional elements in track_state so that size of track_state is the same as current_stack program_state_stack.track_state[len(current_stack):len(program_state_stack.track_state)] = [] if program_state_stack.start_executing == START_EXECUTING_TRUE or last_call != "" or force_starting_execution: store_program_state(program_state_stack.file_name_of_saved_state, program_state_stack.track_state, current_stack) program_state_stack.start_executing = START_EXECUTING_TRUE else: if len(program_state_stack.saved_state) >= len(current_stack): for i in range(len(program_state_stack.saved_state)): if i < len(current_stack): if program_state_stack.track_stack[i] == current_stack[i]: if program_state_stack.track_state[i] == program_state_stack.saved_state[i]: continue break else: program_state_stack.start_executing = START_EXECUTING_ONLY_ONE_TIME_THEN_REVERT # print "////////////////////////////" # print "Entering function: ", location_in_program # print "////////////////////////////" break else: program_state_stack.start_executing = START_EXECUTING_TRUE # print "////////////////////////////" # print "Start executing: ", location_in_program # print "////////////////////////////" break else: ## needs to be initialized for all processes except master program_state_stack.start_executing = START_EXECUTING_FALSE if_error_then_all_processes_exit_program(error_status) program_state_stack.start_executing = mpi_bcast(program_state_stack.start_executing, 1, MPI_INT, 0, MPI_COMM_WORLD) program_state_stack.start_executing = int(program_state_stack.start_executing[0]) return program_state_stack.start_executing
def resample( prjfile, outdir, bufprefix, nbufvol, nvol, seedbase,\ delta, d, snr, CTF, npad,\ MPI, myid, ncpu, verbose = 0 ): from utilities import even_angles from random import seed, jumpahead, shuffle import os from sys import exit nprj = EMUtil.get_image_count( prjfile ) if MPI: from mpi import mpi_barrier, MPI_COMM_WORLD if myid == 0: if os.path.exists(outdir): nx = 1 else: nx = 0 else: nx = 0 ny = bcast_number_to_all(nx, source_node = 0) if ny == 1: ERROR('Output directory exists, please change the name and restart the program', "resample", 1,myid) mpi_barrier(MPI_COMM_WORLD) if myid == 0: os.mkdir(outdir) mpi_barrier(MPI_COMM_WORLD) else: if os.path.exists(outdir): ERROR('Output directory exists, please change the name and restart the program', "resample", 1,0) os.mkdir(outdir) if(verbose == 1): finfo=open( os.path.join(outdir, "progress%04d.txt" % myid), "w" ) else: finfo = None #print " before evenangles",myid from utilities import getvec from numpy import array, reshape refa = even_angles(delta) nrefa = len(refa) refnormal = zeros((nrefa,3),'float32') tetref = [0.0]*nrefa for i in xrange(nrefa): tr = getvec( refa[i][0], refa[i][1] ) for j in xrange(3): refnormal[i][j] = tr[j] tetref[i] = refa[i][1] del refa vct = array([0.0]*(3*nprj),'float32') if myid == 0: print " will read ",myid tr = EMUtil.get_all_attributes(prjfile,'xform.projection') tetprj = [0.0]*nprj for i in xrange(nprj): temp = tr[i].get_params("spider") tetprj[i] = temp["theta"] if(tetprj[i] > 90.0): tetprj[i] = 180.0 - tetprj[i] vct[3*i+0] = tr[i].at(2,0) vct[3*i+1] = tr[i].at(2,1) vct[3*i+2] = tr[i].at(2,2) del tr else: tetprj = [0.0]*nprj #print " READ ",myid if MPI: #print " will bcast",myid from mpi import mpi_bcast, MPI_FLOAT, MPI_COMM_WORLD vct = mpi_bcast(vct,len(vct),MPI_FLOAT,0,MPI_COMM_WORLD) from utilities import bcast_list_to_all tetprj = bcast_list_to_all(tetprj, myid, 0) #print " reshape ",myid vct = reshape(vct,(nprj,3)) assignments = [[] for i in xrange(nrefa)] dspn = 1.25*delta for k in xrange(nprj): best_s = -1.0 best_i = -1 for i in xrange( nrefa ): if(abs(tetprj[k] - tetref[i]) <= dspn): s = abs(refnormal[i][0]*vct[k][0] + refnormal[i][1]*vct[k][1] + refnormal[i][2]*vct[k][2]) if s > best_s: best_s = s best_i = i assignments[best_i].append(k) am = len(assignments[0]) mufur = 1.0/am for i in xrange(1,len(assignments)): ti = len(assignments[i]) am = min(am, ti) if(ti>0): mufur += 1.0/ti del tetprj,tetref dp = 1.0 - d # keep that many in each direction keep = int(am*dp +0.5) mufur = keep*nrefa/(1.0 - mufur*keep/float(nrefa)) if myid == 0: print " Number of projections ",nprj,". Number of reference directions ",nrefa,", multiplicative factor for the variance ",mufur print " Minimum number of assignments ",am," Number of projections used per stratum ", keep," Number of projections in resampled structure ",int(am*dp +0.5)*nrefa if am <2 or am == keep: print "incorrect settings" exit() # FIX if(seedbase < 1): seed() jumpahead(17*myid+123) else: seed(seedbase) jumpahead(17*myid+123) volfile = os.path.join(outdir, "bsvol%04d.hdf" % myid) from random import randint niter = nvol/ncpu/nbufvol for kiter in xrange(niter): if(verbose == 1): finfo.write( "Iteration %d: \n" % kiter ) finfo.flush() iter_start = time() # the following has to be converted to resample mults=1 means take given projection., mults=0 means omit mults = [ [0]*nprj for i in xrange(nbufvol) ] for i in xrange(nbufvol): for l in xrange(nrefa): mass = assignments[l][:] shuffle(mass) mass = mass[:keep] mass.sort() #print l, " * ",mass for k in xrange(keep): mults[i][mass[k]] = 1 ''' lout = [] for l in xrange(len(mults[i])): if mults[i][l] == 1: lout.append(l) write_text_file(lout, os.path.join(outdir, "list%04d_%03d.txt" %(i, myid))) del lout ''' del mass rectors, fftvols, wgtvols = resample_prepare( prjfile, nbufvol, snr, CTF, npad ) resample_insert( bufprefix, fftvols, wgtvols, mults, CTF, npad, finfo ) del mults resample_finish( rectors, fftvols, wgtvols, volfile, kiter, nprj, finfo ) rectors = None fftvols = None wgtvols = None if(verbose == 1): finfo.write( "time for iteration: %10.3f\n" % (time() - iter_start) ) finfo.flush()
from Numeric import * import mpi import sys import math #print "before",len(sys.argv),sys.argv sys.argv = mpi.mpi_init(len(sys.argv), sys.argv) #print "after ",len(sys.argv),sys.argv myid = mpi.mpi_comm_rank(mpi.MPI_COMM_WORLD) numnodes = mpi.mpi_comm_size(mpi.MPI_COMM_WORLD) print "hello from ", myid, " of ", numnodes color = myid % 2 new_comm = mpi.mpi_comm_split(mpi.MPI_COMM_WORLD, color, myid) new_id = mpi.mpi_comm_rank(new_comm) new_nodes = mpi.mpi_comm_size(new_comm) zero_one = -1 if new_id == 0: zero_one = color zero_one = mpi.mpi_bcast(zero_one, 1, mpi.MPI_INT, 0, new_comm) if zero_one == 0: print myid, " part of even processor communicator ", new_id if zero_one == 1: print myid, " part of odd processor communicator ", new_id print "old_id=", myid, "new_id=", new_id mpi.mpi_finalize()
##### start up remote tasks #### toRun = getcwd() + "/worker.py" print mpi.mpi_get_processor_name(), "starting", toRun newcom1 = mpi.mpi_comm_spawn(toRun, "from_P_", copies, mpi.MPI_INFO_NULL, 0, mpi.MPI_COMM_WORLD) errors = mpi.mpi_array_of_errcodes() print "errors=", errors newcom1Size = mpi.mpi_comm_size(newcom1) print "newcom1Size", newcom1Size, " yes it is strange but it should be 1" ##### bcast #### x = array(([1, 2, 3, 4]), "i") count = 4 print "head starting bcast", x junk = mpi.mpi_bcast(x, count, mpi.MPI_INT, mpi.MPI_ROOT, newcom1) print "head did bcast" ##### scatter #### scat = array([10, 20, 30], "i") junk = mpi.mpi_scatter(scat, 1, mpi.MPI_INT, 1, mpi.MPI_INT, mpi.MPI_ROOT, newcom1) ##### send/recv #### for i in range(0, copies): k = (i + 1) * 100 mpi.mpi_send(k, 1, mpi.MPI_INT, i, 1234, newcom1) back = mpi.mpi_recv(1, mpi.MPI_INT, i, 5678, newcom1) print "from ", i, back ##### reduce ####
sys.argv = mpi.mpi_init(len(sys.argv), sys.argv) myid = mpi.mpi_comm_rank(mpi.MPI_COMM_WORLD) numprocs = mpi.mpi_comm_size(mpi.MPI_COMM_WORLD) parent = mpi.mpi_comm_get_parent() parentSize = mpi.mpi_comm_size(parent) print "parentSize", parentSize tod = stamp() s = sys.argv[1] + "%2.2d" % myid print "hello from python worker", myid, " writing to ", s x = array([5, 3, 4, 2], 'i') print "starting bcast" buffer = mpi.mpi_bcast(x, 4, mpi.MPI_INT, 0, parent) out = open(s, "w") out.write(str(buffer)) out.write(tod + "\n") out.close() print myid, " got ", buffer junk = mpi.mpi_scatter(x, 1, mpi.MPI_INT, 1, mpi.MPI_INT, 0, parent) print myid, " got scatter ", junk back = mpi.mpi_recv(1, mpi.MPI_INT, 0, 1234, parent) back[0] = back[0] + 1 mpi.mpi_send(back, 1, mpi.MPI_INT, 0, 5678, parent) dummy = myid final = mpi.mpi_reduce(dummy, 1, mpi.MPI_INT, mpi.MPI_SUM, 0, parent)