def dataset_loading_test(params, visualize=False): imgpath = params['inpath'] psize = params['resolution'] imgstk = MRCImageStack(imgpath, psize) # if params.get('float_images', True): # imgstk.float_images() ctfpath = params['ctfpath'] mscope_params = params['microscope_params'] ctfstk = CTFStack(ctfpath, mscope_params) cryodata = CryoDataset(imgstk, ctfstk) cryodata.compute_noise_statistics() # if params.get('window_images',True): # imgstk.window_images() cryodata.divide_dataset(params['minisize'], params['test_imgs'], params['partition'], params['num_partitions'], params['random_seed']) # cryodata.set_datasign(params.get('datasign', 'auto')) # if params.get('normalize_data',True): # cryodata.normalize_dataset() # voxel_size = cryodata.pixel_size N = cryodata.imgstack.get_num_pixels() fspace_stack = FourierStack(cryodata.imgstack, caching = True, zeropad=1) premult = cryoops.compute_premultiplier(N + 2 * int(1 * (N/2)), 'lanczos', 8) premult = premult.reshape((-1,1)) * premult.reshape((1,-1)) fspace_stack.set_transform(premult, 1) if visualize: rad = 0.99 coords = geometry.gencoords(N, 2).reshape((N**2, 2)) Cs = np.sum(coords**2, axis=1).reshape((N, N)) > (rad * N / 2.0 - 1.5)**2 idx = np.random.randint(cryodata.imgstack.num_images) normalized = cryodata.imgstack.get_image(1) f_normalized = fspace_stack.get_image(1) plot_noise_histogram(normalized, f_normalized, rmask=~Cs, fmask=None, plot_unmask=False) plt.show() return cryodata, fspace_stack
def genphantomdata(N_D, phantompath, ctfparfile): mscope_params = { 'akv': 200, 'wgh': 0.07, 'cs': 2.0, 'psize': 2.8, 'bfactor': 500.0 } N = 128 rad = 0.95 shift_sigma = 3.0 sigma_noise = 25.0 M_totalmass = 80000 kernel = 'lanczos' ksize = 6 premult = cryoops.compute_premultiplier(N, kernel, ksize) tic = time.time() N_D = int(N_D) N = int(N) rad = float(rad) psize = mscope_params['psize'] bfactor = mscope_params['bfactor'] shift_sigma = float(shift_sigma) sigma_noise = float(sigma_noise) M_totalmass = float(M_totalmass) srcctf_stack = CTFStack(ctfparfile, mscope_params) genctf_stack = GeneratedCTFStack( mscope_params, parfields=['PHI', 'THETA', 'PSI', 'SHX', 'SHY']) TtoF = sincint.gentrunctofull(N=N, rad=rad) Cmap = n.sort( n.random.random_integers(0, srcctf_stack.get_num_ctfs() - 1, N_D)) M = mrc.readMRC(phantompath) cryoem.window(M, 'circle') M[M < 0] = 0 if M_totalmass is not None: M *= M_totalmass / M.sum() V = density.real_to_fspace( premult.reshape((1, 1, -1)) * premult.reshape( (1, -1, 1)) * premult.reshape((-1, 1, 1)) * M) print "Generating data..." sys.stdout.flush() imgdata = n.empty((N_D, N, N), dtype=density.real_t) pardata = {'R': [], 't': []} prevctfI = None for i, srcctfI in enumerate(Cmap): ellapse_time = time.time() - tic remain_time = float(N_D - i) * ellapse_time / max(i, 1) print "\r%.2f Percent.. (Elapsed: %s, Remaining: %s) " % ( i / float(N_D) * 100.0, format_timedelta(ellapse_time), format_timedelta(remain_time)), sys.stdout.flush() # Get the CTF for this image cCTF = srcctf_stack.get_ctf(srcctfI) if prevctfI != srcctfI: genctfI = genctf_stack.add_ctf(cCTF) C = cCTF.dense_ctf(N, psize, bfactor).reshape((N**2, )) prevctfI = srcctfI # Randomly generate the viewing direction/shift pt = n.random.randn(3) pt /= n.linalg.norm(pt) psi = 2 * n.pi * n.random.rand() EA = geom.genEA(pt)[0] EA[2] = psi shift = n.random.randn(2) * shift_sigma R = geom.rotmat3D_EA(*EA)[:, 0:2] slop = cryoops.compute_projection_matrix([R], N, kernel, ksize, rad, 'rots') S = cryoops.compute_shift_phases(shift.reshape((1, 2)), N, rad)[0] D = slop.dot(V.reshape((-1, ))) D *= S imgdata[i] = density.fspace_to_real((C * TtoF.dot(D)).reshape( (N, N))) + n.require(n.random.randn(N, N) * sigma_noise, dtype=density.real_t) genctf_stack.add_img(genctfI, PHI=EA[0] * 180.0 / n.pi, THETA=EA[1] * 180.0 / n.pi, PSI=EA[2] * 180.0 / n.pi, SHX=shift[0], SHY=shift[1]) pardata['R'].append(R) pardata['t'].append(shift) print "\rDone in ", time.time() - tic, " seconds." return imgdata, genctf_stack, pardata, mscope_params
def genphantomdata(N_D, phantompath): mscope_params = { 'akv': 200, 'wgh': 0.07, 'cs': 2.0, 'psize': 3.0, 'bfactor': 500.0 } M = mrc.readMRC(phantompath) N = M.shape[0] rad = 0.95 M_totalmass = 1000000 # M_totalmass = 1500000 kernel = 'lanczos' ksize = 6 tic = time.time() N_D = int(N_D) N = int(N) rad = float(rad) psize = mscope_params['psize'] bfactor = mscope_params['bfactor'] M_totalmass = float(M_totalmass) ctfparfile = 'particle/examplectfs.par' srcctf_stack = CTFStack(ctfparfile, mscope_params) genctf_stack = GeneratedCTFStack( mscope_params, parfields=['PHI', 'THETA', 'PSI', 'SHX', 'SHY']) TtoF = sincint.gentrunctofull(N=N, rad=rad) Cmap = np.sort( np.random.random_integers(0, srcctf_stack.get_num_ctfs() - 1, N_D)) cryoem.window(M, 'circle') M[M < 0] = 0 if M_totalmass is not None: M *= M_totalmass / M.sum() # oversampling oversampling_factor = 3 psize = psize * oversampling_factor V = density.real_to_fspace_with_oversampling(M, oversampling_factor) fM = V.real**2 + V.imag**2 # mrc.writeMRC('particle/EMD6044_fM_totalmass_{}_oversampling_{}.mrc'.format(str(int(M_totalmass)).zfill(5), oversampling_factor), fM, psz=psize) print("Generating data...") sys.stdout.flush() imgdata = np.empty((N_D, N, N), dtype=density.real_t) pardata = {'R': []} prevctfI = None coords = geometry.gencoords(N, 2, rad) slicing_func = RegularGridInterpolator((np.arange(N), ) * 3, fM, bounds_error=False, fill_value=0.0) for i, srcctfI in enumerate(Cmap): ellapse_time = time.time() - tic remain_time = float(N_D - i) * ellapse_time / max(i, 1) print("\r%.2f Percent.. (Elapsed: %s, Remaining: %s)" % (i / float(N_D) * 100.0, format_timedelta(ellapse_time), format_timedelta(remain_time)), end='') sys.stdout.flush() # Get the CTF for this image cCTF = srcctf_stack.get_ctf(srcctfI) if prevctfI != srcctfI: genctfI = genctf_stack.add_ctf(cCTF) C = cCTF.dense_ctf(N, psize, bfactor).reshape((N, N)) prevctfI = srcctfI # Randomly generate the viewing direction/shift pt = np.random.randn(3) pt /= np.linalg.norm(pt) psi = 2 * np.pi * np.random.rand() EA = geometry.genEA(pt)[0] EA[2] = psi # Rotate coordinates and get slice image by interpolation R = geometry.rotmat3D_EA(*EA)[:, 0:2] rotated_coords = R.dot(coords.T).T + int(N / 2) slice_data = slicing_func(rotated_coords) intensity = TtoF.dot(slice_data) np.maximum(intensity, 0.0, out=intensity) # Add poisson noise img = np.float_(np.random.poisson(intensity.reshape(N, N))) np.maximum(1e-8, img, out=img) imgdata[i] = np.require(img, dtype=density.real_t) genctf_stack.add_img(genctfI, PHI=EA[0] * 180.0 / np.pi, THETA=EA[1] * 180.0 / np.pi, PSI=EA[2] * 180.0 / np.pi, SHX=0.0, SHY=0.0) pardata['R'].append(R) print("\n\rDone in ", time.time() - tic, " seconds.") return imgdata, genctf_stack, pardata, mscope_params
def __init__(self, expbase, cmdparams=None): """cryodata is a CryoData instance. expbase is a path to the base of folder where this experiment's files will be stored. The folder above expbase will also be searched for .params files. These will be loaded first.""" BackgroundWorker.__init__(self) # Create a background thread which handles IO self.io_queue = Queue() self.io_thread = Thread(target=self.ioworker) self.io_thread.daemon = True self.io_thread.start() # General setup ---------------------------------------------------- self.expbase = expbase self.outbase = None # Paramter setup --------------------------------------------------- # search above expbase for params files _,_,filenames = os.walk(opj(expbase,'../')).next() self.paramfiles = [opj(opj(expbase,'../'), fname) \ for fname in filenames if fname.endswith('.params')] # search expbase for params files _,_,filenames = os.walk(opj(expbase)).next() self.paramfiles += [opj(expbase,fname) \ for fname in filenames if fname.endswith('.params')] if 'local.params' in filenames: self.paramfiles += [opj(expbase,'local.params')] # load parameter files self.params = Params(self.paramfiles) self.cparams = None if cmdparams is not None: # Set parameter specified on the command line for k,v in cmdparams.iteritems(): self.params[k] = v # Dataset setup ------------------------------------------------------- self.imgpath = self.params['inpath'] psize = self.params['resolution'] if not isinstance(self.imgpath,list): imgstk = MRCImageStack(self.imgpath,psize) else: imgstk = CombinedImageStack([MRCImageStack(cimgpath,psize) for cimgpath in self.imgpath]) if self.params.get('float_images',True): imgstk.float_images() self.ctfpath = self.params['ctfpath'] mscope_params = self.params['microscope_params'] if not isinstance(self.ctfpath,list): ctfstk = CTFStack(self.ctfpath,mscope_params) else: ctfstk = CombinedCTFStack([CTFStack(cctfpath,mscope_params) for cctfpath in self.ctfpath]) self.cryodata = CryoDataset(imgstk,ctfstk) self.cryodata.compute_noise_statistics() if self.params.get('window_images',True): imgstk.window_images() minibatch_size = self.params['minisize'] testset_size = self.params['test_imgs'] partition = self.params.get('partition',0) num_partitions = self.params.get('num_partitions',1) seed = self.params['random_seed'] if isinstance(partition,str): partition = eval(partition) if isinstance(num_partitions,str): num_partitions = eval(num_partitions) if isinstance(seed,str): seed = eval(seed) self.cryodata.divide_dataset(minibatch_size,testset_size,partition,num_partitions,seed) self.cryodata.set_datasign(self.params.get('datasign','auto')) if self.params.get('normalize_data',True): self.cryodata.normalize_dataset() self.voxel_size = self.cryodata.pixel_size # Iterations setup ------------------------------------------------- self.iteration = 0 self.tic_epoch = None self.num_data_evals = 0 self.eval_params() outdir = self.cparams.get('outdir',None) if outdir is None: if self.cparams.get('num_partitions',1) > 1: outdir = 'partition{0}'.format(self.cparams['partition']) else: outdir = '' self.outbase = opj(self.expbase,outdir) if not os.path.isdir(self.outbase): os.makedirs(self.outbase) # Output setup ----------------------------------------------------- self.ostream = OutputStream(opj(self.outbase,'stdout')) self.ostream(80*"=") self.ostream("Experiment: " + expbase + \ " Kernel: " + self.params['kernel']) self.ostream("Started on " + socket.gethostname() + \ " At: " + time.strftime('%B %d %Y: %I:%M:%S %p')) self.ostream("Git SHA1: " + gitutil.git_get_SHA1()) self.ostream(80*"=") gitutil.git_info_dump(opj(self.outbase, 'gitinfo')) self.startdatetime = datetime.now() # for diagnostics and parameters self.diagout = Output(opj(self.outbase, 'diag'),runningout=False) # for stats (per image etc) self.statout = Output(opj(self.outbase, 'stat'),runningout=True) # for likelihoods of individual images self.likeout = Output(opj(self.outbase, 'like'),runningout=False) self.img_likes = n.empty(self.cryodata.N_D) self.img_likes[:] = n.inf # optimization state vars ------------------------------------------ init_model = self.cparams.get('init_model',None) if init_model is not None: filename = init_model if filename.upper().endswith('.MRC'): M = readMRC(filename) else: with open(filename) as fp: M = cPickle.load(fp) if type(M)==list: M = M[-1]['M'] if M.shape != 3*(self.cryodata.N,): M = cryoem.resize_ndarray(M,3*(self.cryodata.N,),axes=(0,1,2)) else: init_seed = self.cparams.get('init_random_seed',0) + self.cparams.get('partition',0) print "Randomly generating initial density (init_random_seed = {0})...".format(init_seed), ; sys.stdout.flush() tic = time.time() M = cryoem.generate_phantom_density(self.cryodata.N, 0.95*self.cryodata.N/2.0, \ 5*self.cryodata.N/128.0, 30, seed=init_seed) print "done in {0}s".format(time.time() - tic) tic = time.time() print "Windowing and aligning initial density...", ; sys.stdout.flush() # window the initial density wfunc = self.cparams.get('init_window','circle') cryoem.window(M,wfunc) # Center and orient the initial density cryoem.align_density(M) print "done in {0:.2f}s".format(time.time() - tic) # apply the symmetry operator init_sym = get_symmetryop(self.cparams.get('init_symmetry',self.cparams.get('symmetry',None))) if init_sym is not None: tic = time.time() print "Applying symmetry operator...", ; sys.stdout.flush() M = init_sym.apply(M) print "done in {0:.2f}s".format(time.time() - tic) tic = time.time() print "Scaling initial model...", ; sys.stdout.flush() modelscale = self.cparams.get('modelscale','auto') mleDC, _, mleDC_est_std = self.cryodata.get_dc_estimate() if modelscale == 'auto': # Err on the side of a weaker prior by using a larger value for modelscale modelscale = (n.abs(mleDC) + 2*mleDC_est_std)/self.cryodata.N print "estimated modelscale = {0:.3g}...".format(modelscale), ; sys.stdout.flush() self.params['modelscale'] = modelscale self.cparams['modelscale'] = modelscale M *= modelscale/M.sum() print "done in {0:.2f}s".format(time.time() - tic) if mleDC_est_std/n.abs(mleDC) > 0.05: print " WARNING: the DC component estimate has a high relative variance, it may be inaccurate!" if ((modelscale*self.cryodata.N - n.abs(mleDC)) / mleDC_est_std) > 3: print " WARNING: the selected modelscale value is more than 3 std devs different than the estimated one. Be sure this is correct." self.M = n.require(M,dtype=density.real_t) self.fM = density.real_to_fspace(M) self.dM = density.zeros_like(self.M) self.step = eval(self.cparams['optim_algo']) self.step.setup(self.cparams, self.diagout, self.statout, self.ostream) # Objective function setup -------------------------------------------- param_type = self.cparams.get('parameterization','real') cplx_param = param_type in ['complex','complex_coeff','complex_herm_coeff'] self.like_func = eval_objective(self.cparams['likelihood']) self.prior_func = eval_objective(self.cparams['prior']) if self.cparams.get('penalty',None) is not None: self.penalty_func = eval_objective(self.cparams['penalty']) prior_func = SumObjectives(self.prior_func.fspace, \ [self.penalty_func,self.prior_func], None) else: prior_func = self.prior_func self.obj = SumObjectives(cplx_param, [self.like_func,prior_func], [None,None]) self.obj.setup(self.cparams, self.diagout, self.statout, self.ostream) self.obj.set_dataset(self.cryodata) self.obj_wrapper = ObjectiveWrapper(param_type) self.last_save = time.time() self.logpost_history = FiniteRunningSum() self.like_history = FiniteRunningSum() # Importance Samplers ------------------------------------------------- self.is_sym = get_symmetryop(self.cparams.get('is_symmetry',self.cparams.get('symmetry',None))) self.sampler_R = FixedFisherImportanceSampler('_R',self.is_sym) self.sampler_I = FixedFisherImportanceSampler('_I') self.sampler_S = FixedGaussianImportanceSampler('_S') self.like_func.set_samplers(sampler_R=self.sampler_R,sampler_I=self.sampler_I,sampler_S=self.sampler_S)
def genphantomdata(N_D, phantompath, ctfparfile): # mscope_params = {'akv': 200, 'wgh': 0.07, # 'cs': 2.0, 'psize': 2.8, 'bfactor': 500.0} mscope_params = {'akv': 200, 'wgh': 0.07, 'cs': 2.0, 'psize': 3.0, 'bfactor': 500.0} M = mrc.readMRC(phantompath) N = M.shape[0] rad = 0.95 shift_sigma = 3.0 sigma_noise = 25.0 M_totalmass = 80000 kernel = 'lanczos' ksize = 6 premult = cryoops.compute_premultiplier(N, kernel, ksize) tic = time.time() N_D = int(N_D) N = int(N) rad = float(rad) psize = mscope_params['psize'] bfactor = mscope_params['bfactor'] shift_sigma = float(shift_sigma) sigma_noise = float(sigma_noise) M_totalmass = float(M_totalmass) srcctf_stack = CTFStack(ctfparfile, mscope_params) genctf_stack = GeneratedCTFStack(mscope_params, parfields=[ 'PHI', 'THETA', 'PSI', 'SHX', 'SHY']) TtoF = sincint.gentrunctofull(N=N, rad=rad) Cmap = np.sort(np.random.random_integers( 0, srcctf_stack.get_num_ctfs() - 1, N_D)) cryoem.window(M, 'circle') M[M < 0] = 0 if M_totalmass is not None: M *= M_totalmass / M.sum() V = density.real_to_fspace( premult.reshape((1, 1, -1)) * premult.reshape((1, -1, 1)) * premult.reshape((-1, 1, 1)) * M) print("Generating data...") sys.stdout.flush() imgdata = np.empty((N_D, N, N), dtype=density.real_t) pardata = {'R': [], 't': []} prevctfI = None for i, srcctfI in enumerate(Cmap): ellapse_time = time.time() - tic remain_time = float(N_D - i) * ellapse_time / max(i, 1) print("\r%.2f Percent.. (Elapsed: %s, Remaining: %s)" % (i / float(N_D) * 100.0, format_timedelta(ellapse_time), format_timedelta(remain_time))) sys.stdout.flush() # Get the CTF for this image cCTF = srcctf_stack.get_ctf(srcctfI) if prevctfI != srcctfI: genctfI = genctf_stack.add_ctf(cCTF) C = cCTF.dense_ctf(N, psize, bfactor).reshape((N**2,)) prevctfI = srcctfI # Randomly generate the viewing direction/shift pt = np.random.randn(3) pt /= np.linalg.norm(pt) psi = 2 * np.pi * np.random.rand() EA = geometry.genEA(pt)[0] EA[2] = psi shift = np.random.randn(2) * shift_sigma R = geometry.rotmat3D_EA(*EA)[:, 0:2] slop = cryoops.compute_projection_matrix( [R], N, kernel, ksize, rad, 'rots') S = cryoops.compute_shift_phases(shift.reshape((1, 2)), N, rad)[0] D = slop.dot(V.reshape((-1,))) D *= S imgdata[i] = density.fspace_to_real((C * TtoF.dot(D)).reshape((N, N))) + np.require( np.random.randn(N, N) * sigma_noise, dtype=density.real_t) genctf_stack.add_img(genctfI, PHI=EA[0] * 180.0 / np.pi, THETA=EA[1] * 180.0 / np.pi, PSI=EA[2] * 180.0 / np.pi, SHX=shift[0], SHY=shift[1]) pardata['R'].append(R) pardata['t'].append(shift) print("\rDone in ", time.time() - tic, " seconds.") return imgdata, genctf_stack, pardata, mscope_params