def main(args): # load particles particles = dataset.load_particles(args.mrcs, datadir=args.datadir) log(particles.shape) Nimg, D, D = particles.shape trans = utils.load_pkl(args.trans) if type(trans) is tuple: trans = trans[1] trans *= args.tscale assert np.all( trans <= 1 ), "ERROR: Old pose format detected. Translations must be in units of fraction of box." trans *= D # convert to pixels assert len(trans) == Nimg xx, yy = np.meshgrid(np.arange(-D / 2, D / 2), np.arange(-D / 2, D / 2)) TCOORD = np.stack([xx, yy], axis=2) / D # DxDx2 imgs = [] for ii in range(Nimg): ff = fft.fft2_center(particles[ii]) tfilt = np.dot(TCOORD, trans[ii]) * -2 * np.pi tfilt = np.cos(tfilt) + np.sin(tfilt) * 1j ff *= tfilt img = fft.ifftn_center(ff) imgs.append(img) imgs = np.asarray(imgs).astype(np.float32) mrc.write(args.o, imgs) if args.out_png: plot_projections(args.out_png, imgs[:9])
def main(args): assert args.input.endswith('.mrc'), "Input volume must be .mrc file" assert args.o.endswith('.mrc'), "Output volume must be .mrc file" x, h = mrc.parse_mrc(args.input) x = x[::-1] mrc.write(args.o, x, header=h) log(f'Wrote {args.o}')
def mask_volume(volpath, outpath, Apix, thresh=None, dilate=3, dist=10): ''' Helper function to generate a loose mask around the input density Density is thresholded to 50% maximum intensity, dilated outwards, and a soft cosine edge is applied Inputs volpath: an absolute path to the volume to be used for masking outpath: an absolute path to write out the mask mrc thresh: what intensity threshold between [0, 100] to apply dilate: how far to dilate the thresholded density outwards dist: how far the cosine edge extends from the density Outputs volume.masked.mrc written to outdir ''' vol = mrc.parse_mrc(volpath)[0] thresh = np.percentile(vol, 99.99) / 2 if thresh is None else thresh x = (vol >= thresh).astype(bool) x = binary_dilation(x, iterations=dilate) y = distance_transform_edt(~x.astype(bool)) y[y > dist] = dist z = np.cos(np.pi * y / dist / 2) # check that mask is in range [0,1] assert np.all(z >= 0) assert np.all(z <= 1) # used to write out mask separately from masked volume, now apply and save the masked vol to minimize future I/O # mrc.write(outpath, z.astype(np.float32)) vol *= z mrc.write(outpath, vol.astype(np.float32), Apix=Apix)
def main(args): x = dataset.load_particles(args.input, lazy=True) log(f'Loaded {len(x)} particles') ind = utils.load_pkl(args.ind) x = np.array([x[i].get() for i in ind]) log(f'New stack dimensions: {x.shape}') mrc.write(args.o, x)
def main(args): assert args.input.endswith('.mrc'), "Input volume must be .mrc file" assert args.o.endswith('.mrc'), "Output volume must be .mrc file" x, h = mrc.parse_mrc(args.input) h.update_apix(args.apix) if args.invert: x *= -1 if args.flip: x = x[::-1] mrc.write(args.o, x, header=h) log(f'Wrote {args.o}')
def save_checkpoint(model, lattice, optim, epoch, norm, Apix, out_mrc, out_weights): model.eval() if isinstance(model, nn.DataParallel): model = model.module vol = model.eval_volume(lattice.coords, lattice.D, lattice.extent, norm) mrc.write(out_mrc, vol.astype(np.float32), Apix=Apix) torch.save({ 'norm': norm, 'epoch':epoch, 'model_state_dict':model.state_dict(), 'optimizer_state_dict':optim.state_dict(), }, out_weights)
def make_mask(outdir, K, dilate, thresh, in_mrc=None): if in_mrc is None: if thresh is None: thresh = [] for i in range(K): vol = mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0] thresh.append(np.percentile(vol, 99.99) / 2) thresh = np.mean(thresh) log(f'Threshold: {thresh}') log(f'Dilating mask by: {dilate}') def binary_mask(vol): x = (vol >= thresh).astype(bool) x = binary_dilation(x, iterations=dilate) return x # combine all masks by taking their union vol = mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_000.mrc')[0] mask = ~binary_mask(vol) for i in range(1, K): vol = mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0] mask *= ~binary_mask(vol) mask = ~mask else: # Load provided mrc and convert to a boolean mask mask, _ = mrc.parse_mrc(in_mrc) mask = mask.astype(bool) # save mask out_mrc = f'{outdir}/mask.mrc' log(f'Saving {out_mrc}') mrc.write(out_mrc, mask.astype(np.float32)) # view slices out_png = f'{outdir}/mask_slices.png' D = vol.shape[0] fig, ax = plt.subplots(1, 3, figsize=(10, 8)) ax[0].imshow(mask[D // 2, :, :]) ax[1].imshow(mask[:, D // 2, :]) ax[2].imshow(mask[:, :, D // 2]) plt.savefig(out_png)
def main(args): imgs = dataset.load_particles(args.mrcs, lazy=True, datadir=args.datadir) ctf_params = utils.load_pkl(args.ctf_params) assert len(imgs) == len(ctf_params) D = imgs[0].get().shape[0] fx, fy = np.meshgrid(np.linspace(-.5, .5, D, endpoint=False), np.linspace(-.5, .5, D, endpoint=False)) freqs = np.stack([fx.ravel(), fy.ravel()], 1) imgs_flip = np.empty((len(imgs), D, D), dtype=np.float32) for i in range(len(imgs)): if i % 1000 == 0: print(i) c = ctf.compute_ctf_np(freqs / ctf_params[i, 0], *ctf_params[i, 1:]) c = c.reshape((D, D)) ff = fft.fft2_center(imgs[i].get()) ff *= np.sign(c) img = fft.ifftn_center(ff) imgs_flip[i] = img.astype(np.float32) mrc.write(args.o, imgs_flip)
def main(args): check_inputs(args) t1 = dt.now() ## set the device use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') log('Use cuda {}'.format(use_cuda)) if not use_cuda: log('WARNING: No GPUs detected') log(args) cfg = config.overwrite_config(args.config, args) log('Loaded configuration:') pprint.pprint(cfg) D = cfg['lattice_args']['D'] # image size + 1 zdim = cfg['model_args']['zdim'] norm = cfg['dataset_args']['norm'] if args.downsample: assert args.downsample % 2 == 0, "Boxsize must be even" assert args.downsample <= D - 1, "Must be smaller than original box size" model, lattice = HetOnlyVAE.load(cfg, args.weights, device=device) model.eval() ### Multiple z ### if args.z_start or args.zfile: ### Get z values if args.z_start: args.z_start = np.array(args.z_start) args.z_end = np.array(args.z_end) z = np.repeat(np.arange(args.n, dtype=np.float32), zdim).reshape( (args.n, zdim)) z *= ((args.z_end - args.z_start) / (args.n - 1)) z += args.z_start else: z = np.loadtxt(args.zfile).reshape(-1, zdim) if not os.path.exists(args.o): os.makedirs(args.o) log(f'Generating {len(z)} volumes') for i, zz in enumerate(z): log(zz) if args.downsample: extent = lattice.extent * (args.downsample / (D - 1)) vol = model.decoder.eval_volume( lattice.get_downsample_coords(args.downsample + 1), args.downsample + 1, extent, norm, zz) else: vol = model.decoder.eval_volume(lattice.coords, lattice.D, lattice.extent, norm, zz) out_mrc = '{}/{}{:03d}.mrc'.format(args.o, args.prefix, i) if args.flip: vol = vol[::-1] if args.invert: vol *= -1 mrc.write(out_mrc, vol.astype(np.float32), Apix=args.Apix) ### Single z ### else: z = np.array(args.z) log(z) if args.downsample: extent = lattice.extent * (args.downsample / (D - 1)) vol = model.decoder.eval_volume( lattice.get_downsample_coords(args.downsample + 1), args.downsample + 1, extent, norm, z) else: vol = model.decoder.eval_volume(lattice.coords, lattice.D, lattice.extent, norm, z) if args.flip: vol = vol[::-1] if args.invert: vol *= -1 mrc.write(args.o, vol.astype(np.float32), Apix=args.Apix) td = dt.now() - t1 log('Finished in {}'.format(td))
def main(args): mkbasedir(args.o) warnexists(args.o) assert (args.o.endswith('.mrcs') or args.o.endswith('mrc') ), "Must specify output in .mrc(s) file format" lazy = not args.is_vol old = dataset.load_particles(args.mrcs, lazy=lazy, datadir=args.datadir, relion31=args.relion31) oldD = old[0].get().shape[0] if lazy else old.shape[-1] assert args.D <= oldD, f'New box size {args.D} cannot be larger than the original box size {oldD}' assert args.D % 2 == 0, 'New box size must be even' D = args.D start = int(oldD / 2 - D / 2) stop = int(oldD / 2 + D / 2) def _combine_imgs(imgs): ret = [] for img in imgs: img.shape = (1, *img.shape) # (D,D) -> (1,D,D) cur = imgs[0] for img in imgs[1:]: if img.fname == cur.fname and img.offset == cur.offset + 4 * np.product( cur.shape): cur.shape = (cur.shape[0] + 1, *cur.shape[1:]) else: ret.append(cur) cur = img ret.append(cur) return ret def downsample_images(imgs): if lazy: imgs = _combine_imgs(imgs) imgs = np.concatenate([i.get() for i in imgs]) with Pool(min(args.max_threads, mp.cpu_count())) as p: oldft = np.asarray(p.map(fft.ht2_center, imgs)) newft = oldft[:, start:stop, start:stop] new = np.asarray(p.map(fft.iht2_center, newft)) return new def downsample_in_batches(old, b): new = np.empty((len(old), D, D), dtype=np.float32) for ii in range(math.ceil(len(old) / b)): log(f'Processing batch {ii}') new[ii * b:(ii + 1) * b, :, :] = downsample_images( old[ii * b:(ii + 1) * b]) return new ### Downsample volume ### if args.is_vol: oldft = fft.htn_center(old) log(oldft.shape) newft = oldft[start:stop, start:stop, start:stop] log(newft.shape) new = fft.ihtn_center(newft).astype(np.float32) log(f'Saving {args.o}') mrc.write(args.o, new, is_vol=True) ### Downsample images ### elif args.chunk is None: new = downsample_in_batches(old, args.b) log(new.shape) log('Saving {}'.format(args.o)) mrc.write(args.o, new.astype(np.float32), is_vol=False) ### Downsample images, saving chunks of N images ### else: nchunks = math.ceil(len(old) / args.chunk) out_mrcs = [ '.{}'.format(i).join(os.path.splitext(args.o)) for i in range(nchunks) ] chunk_names = [os.path.basename(x) for x in out_mrcs] for i in range(nchunks): log('Processing chunk {}'.format(i)) chunk = old[i * args.chunk:(i + 1) * args.chunk] new = downsample_in_batches(chunk, args.b) log(new.shape) log(f'Saving {out_mrcs[i]}') mrc.write(out_mrcs[i], new, is_vol=False) # Write a text file with all chunks out_txt = '{}.txt'.format(os.path.splitext(args.o)[0]) log(f'Saving {out_txt}') with open(out_txt, 'w') as f: f.write('\n'.join(chunk_names))
def main(args): assert args.o.endswith('.mrc') t1 = time.time() log(args) if not os.path.exists(os.path.dirname(args.o)): os.makedirs(os.path.dirname(args.o)) ## set the device use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') log('Use cuda {}'.format(use_cuda)) if use_cuda: torch.set_default_tensor_type(torch.cuda.FloatTensor) else: log('WARNING: No GPUs detected') # load the particles if args.tilt is None: data = dataset.LazyMRCData(args.particles, norm=(0, 1), invert_data=args.invert_data, datadir=args.datadir) tilt = None else: data = dataset.TiltMRCData(args.particles, args.tilt, norm=(0, 1), invert_data=args.invert_data, datadir=args.datadir) tilt = torch.tensor(utils.xrot(args.tilt_deg).astype(np.float32)) D = data.D Nimg = data.N lattice = Lattice(D, extent=D // 2) posetracker = PoseTracker.load(args.poses, Nimg, D, None, None) if args.ctf is not None: log('Loading ctf params from {}'.format(args.ctf)) ctf_params = ctf.load_ctf_for_training(D - 1, args.ctf) ctf_params = torch.tensor(ctf_params) else: ctf_params = None Apix = ctf_params[0, 0] if ctf_params is not None else 1 V = torch.zeros((D, D, D)) counts = torch.zeros((D, D, D)) mask = lattice.get_circular_mask(D // 2) if args.ind: iterator = pickle.load(open(args.ind, 'rb')) elif args.first: args.first = min(args.first, Nimg) iterator = range(args.first) else: iterator = range(Nimg) for ii in iterator: if ii % 100 == 0: log('image {}'.format(ii)) r, t = posetracker.get_pose(ii) ff = data.get(ii) if tilt is not None: ff, ff_tilt = ff # EW ff = torch.tensor(ff) ff = ff.view(-1)[mask] if ctf_params is not None: freqs = lattice.freqs2d / ctf_params[ii, 0] c = ctf.compute_ctf(freqs, *ctf_params[ii, 1:]).view(-1)[mask] ff *= c.sign() if t is not None: ff = lattice.translate_ht(ff.view(1, -1), t.view(1, 1, 2), mask).view(-1) ff_coord = lattice.coords[mask] @ r add_slice(V, counts, ff_coord, ff, D) # tilt series if args.tilt is not None: ff_tilt = torch.tensor(ff_tilt) ff_tilt = ff_tilt.view(-1)[mask] if ctf_params is not None: ff_tilt *= c.sign() if t is not None: ff_tilt = lattice.translate_ht(ff_tilt.view(1, -1), t.view(1, 1, 2), mask).view(-1) ff_coord = lattice.coords[mask] @ tilt @ r add_slice(V, counts, ff_coord, ff_tilt, D) td = time.time() - t1 log('Backprojected {} images in {}s ({}s per image)'.format( len(iterator), td, td / Nimg)) counts[counts == 0] = 1 V /= counts V = fft.ihtn_center(V[0:-1, 0:-1, 0:-1].cpu().numpy()) mrc.write(args.o, V.astype('float32'), Apix=Apix)
def main(args): mkbasedir(args.o) warnexists(args.o) assert ( args.o.endswith('.mrcs') or args.o.endswith('.txt')), "Must specify output in .mrcs file format" # load images lazy = args.lazy images = dataset.load_particles(args.mrcs, lazy=lazy, datadir=args.datadir, relion31=args.relion31) # filter images if args.ind is not None: log(f'Filtering image dataset with {args.ind}') ind = utils.load_pkl(args.ind).astype(int) images = [images[i] for i in ind] if lazy else images[ind] original_D = images[0].get().shape[0] if lazy else images.shape[-1] log(f'Loading {len(images)} {original_D}x{original_D} images') window = args.window invert_data = args.invert_data downsample = (args.D and args.D < original_D) if downsample: assert args.D <= original_D, f'New box size {args.D} cannot be larger than the original box size {D}' assert args.D % 2 == 0, 'New box size must be even' start = int(original_D / 2 - args.D / 2) stop = int(original_D / 2 + args.D / 2) D = args.D log(f'Downsampling images to {D}x{D}') else: D = original_D def _combine_imgs(imgs): ret = [] for img in imgs: img.shape = (1, *img.shape) # (D,D) -> (1,D,D) cur = imgs[0] for img in imgs[1:]: if img.fname == cur.fname and img.offset == cur.offset + 4 * np.product( cur.shape): cur.shape = (cur.shape[0] + 1, *cur.shape[1:]) else: ret.append(cur) cur = img ret.append(cur) return ret def preprocess(imgs): if lazy: imgs = _combine_imgs(imgs) imgs = np.concatenate([i.get() for i in imgs]) with Pool(min(args.max_threads, mp.cpu_count())) as p: # todo: refactor as a routine in dataset.py # note: applying the window before downsampling is slightly # different than in the original workflow if window: imgs *= dataset.window_mask(original_D, args.window_r, .99) ret = np.asarray(p.map(fft.ht2_center, imgs)) if invert_data: ret *= -1 if downsample: ret = ret[:, start:stop, start:stop] ret = fft.symmetrize_ht(ret) return ret def preprocess_in_batches(imgs, b): ret = np.empty((len(imgs), D + 1, D + 1), dtype=np.float32) Nbatches = math.ceil(len(imgs) / b) for ii in range(Nbatches): log(f'Processing batch of {b} images ({ii+1} of {Nbatches})') ret[ii * b:(ii + 1) * b, :, :] = preprocess(imgs[ii * b:(ii + 1) * b]) return ret nchunks = math.ceil(len(images) / args.chunk) out_mrcs = [ f'.{i}.ft'.join(os.path.splitext(args.o)) for i in range(nchunks) ] chunk_names = [os.path.basename(x) for x in out_mrcs] for i in range(nchunks): log(f'Processing chunk {i+1} of {nchunks}') chunk = images[i * args.chunk:(i + 1) * args.chunk] new = preprocess_in_batches(chunk, args.b) log(f'New shape: {new.shape}') log(f'Saving {out_mrcs[i]}') mrc.write(out_mrcs[i], new, is_vol=False) out_txt = f'{os.path.splitext(args.o)[0]}.ft.txt' log(f'Saving summary txt file {out_txt}') with open(out_txt, 'w') as f: f.write('\n'.join(chunk_names))
def analyze_volumes(outdir, K, dim, M, linkage, vol_ind=None, plot_dim=5, particle_ind_orig=None): cmap = choose_cmap(M) # load mean volume, compute it if it does not exist if not os.path.exists(f'{outdir}/kmeans{K}/vol_mean.mrc'): volm = np.array([ mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0] for i in range(K) ]).mean(axis=0) mrc.write(f'{outdir}/kmeans{K}/vol_mean.mrc', volm) else: volm = mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_mean.mrc')[0] # load mask mask = mrc.parse_mrc(f'{outdir}/mask.mrc')[0].astype(bool) log(f'{mask.sum()} voxels in mask') # load volumes vols = np.array([ mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0][mask] for i in range(K) ]) vols[vols < 0] = 0 # load umap umap = utils.load_pkl(f'{outdir}/umap.pkl') ind = np.loadtxt(f'{outdir}/kmeans{K}/centers_ind.txt').astype(int) if vol_ind is not None: log(f'Filtering to {len(vol_ind)} volumes') vols = vols[vol_ind] ind = ind[vol_ind] # compute PCA pca = PCA(dim) pca.fit(vols) pc = pca.transform(vols) utils.save_pkl(pc, f'{outdir}/vol_pca_{K}.pkl') utils.save_pkl(pca, f'{outdir}/vol_pca_obj.pkl') log('Explained variance ratio:') log(pca.explained_variance_ratio_) # save rxn coordinates for i in range(plot_dim): subdir = f'{outdir}/vol_pcs/pc{i+1}' if not os.path.exists(subdir): os.makedirs(subdir) min_, max_ = pc[:, i].min(), pc[:, i].max() log((min_, max_)) for j, val in enumerate(np.linspace(min_, max_, 10, endpoint=True)): v = volm.copy() v[mask] += pca.components_[i] * val mrc.write(f'{subdir}/{j}.mrc', v) # which plots to show??? def plot(i, j): plt.figure() plt.scatter(pc[:, i], pc[:, j]) plt.xlabel( f'Volume PC{i+1} (EV: {pca.explained_variance_ratio_[i]:03f})') plt.ylabel( f'Volume PC{j+1} (EV: {pca.explained_variance_ratio_[j]:03f})') plt.savefig(f'{outdir}/vol_pca_{K}_{i+1}_{j+1}.png') for i in range(plot_dim - 1): plot(i, i + 1) # clustering subdir = f'{outdir}/clustering_L2_{linkage}_{M}' if not os.path.exists(subdir): os.makedirs(subdir) cluster = AgglomerativeClustering(n_clusters=M, affinity='euclidean', linkage=linkage) labels = cluster.fit_predict(vols) utils.save_pkl(labels, f'{subdir}/state_labels.pkl') kmeans_labels = utils.load_pkl(f'{outdir}/kmeans{K}/labels.pkl') kmeans_counts = Counter(kmeans_labels) for i in range(M): vol_i = np.where(labels == i)[0] log(f'State {i}: {len(vol_i)} volumes') if vol_ind is not None: vol_i = np.arange(K)[vol_ind][vol_i] vol_i_all = np.array([ mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0] for i in vol_i ]) nparticles = np.array([kmeans_counts[i] for i in vol_i]) vol_i_mean = np.average(vol_i_all, axis=0, weights=nparticles) vol_i_std = np.average((vol_i_all - vol_i_mean)**2, axis=0, weights=nparticles)**.5 mrc.write(f'{subdir}/state_{i}_mean.mrc', vol_i_mean.astype(np.float32)) mrc.write(f'{subdir}/state_{i}_std.mrc', vol_i_std.astype(np.float32)) if not os.path.exists(f'{subdir}/state_{i}'): os.makedirs(f'{subdir}/state_{i}') for v in vol_i: os.symlink(f'{outdir}/kmeans{K}/vol_{v:03d}.mrc', f'{subdir}/state_{i}/vol_{v:03d}.mrc') particle_ind = analysis.get_ind_for_cluster(kmeans_labels, vol_i) log(f'State {i}: {len(particle_ind)} particles') if particle_ind_orig is not None: utils.save_pkl(particle_ind_orig[particle_ind], f'{subdir}/state_{i}_particle_ind.pkl') else: utils.save_pkl(particle_ind, f'{subdir}/state_{i}_particle_ind.pkl') # plot clustering results def hack_barplot(counts_): if M <= 20: # HACK TO GET COLORS with sns.color_palette(cmap): g = sns.barplot(np.arange(M), counts_) else: # default is husl g = sns.barplot(np.arange(M), counts_) return g plt.figure() counts = Counter(labels) g = hack_barplot([counts[i] for i in range(M)]) for i in range(M): g.text(i - .1, counts[i] + 2, counts[i]) plt.xlabel('State') plt.ylabel('Count') plt.savefig(f'{subdir}/state_volume_counts.png') plt.figure() particle_counts = [ np.sum([kmeans_counts[ii] for ii in np.where(labels == i)[0]]) for i in range(M) ] g = hack_barplot(particle_counts) for i in range(M): g.text(i - .1, particle_counts[i] + 2, particle_counts[i]) plt.xlabel('State') plt.ylabel('Count') plt.savefig(f'{subdir}/state_particle_counts.png') def plot_w_labels(i, j): plt.figure() plt.scatter(pc[:, i], pc[:, j], c=labels, cmap=cmap) plt.xlabel( f'Volume PC{i+1} (EV: {pca.explained_variance_ratio_[i]:03f})') plt.ylabel( f'Volume PC{j+1} (EV: {pca.explained_variance_ratio_[j]:03f})') plt.savefig(f'{subdir}/vol_pca_{K}_{i+1}_{j+1}.png') for i in range(plot_dim - 1): plot_w_labels(i, i + 1) def plot_w_labels_annotated(i, j): fig, ax = plt.subplots(figsize=(16, 16)) plt.scatter(pc[:, i], pc[:, j], c=labels, cmap=cmap) annots = np.arange(K) if vol_ind is not None: annots = annots[vol_ind] for ii, k in enumerate(annots): ax.annotate(str(k), pc[ii, [i, j]] + np.array([.1, .1])) plt.xlabel( f'Volume PC{i+1} (EV: {pca.explained_variance_ratio_[i]:03f})') plt.ylabel( f'Volume PC{j+1} (EV: {pca.explained_variance_ratio_[j]:03f})') plt.savefig(f'{subdir}/vol_pca_{K}_annotated_{i+1}_{j+1}.png') for i in range(plot_dim - 1): plot_w_labels_annotated(i, i + 1) # plot clusters on UMAP umap_i = umap[ind] fig, ax = plt.subplots(figsize=(8, 8)) plt.scatter(umap[:, 0], umap[:, 1], alpha=.1, s=1, rasterized=True, color='lightgrey') colors = get_colors_for_cmap(cmap, M) for i in range(M): c = umap_i[np.where(labels == i)] plt.scatter(c[:, 0], c[:, 1], label=i, color=colors[i]) plt.legend() plt.xlabel('UMAP1') plt.ylabel('UMAP2') plt.savefig(f'{subdir}/umap.png') fig, ax = plt.subplots(figsize=(16, 16)) plt.scatter(umap[:, 0], umap[:, 1], alpha=.1, s=1, rasterized=True, color='lightgrey') plt.scatter(umap_i[:, 0], umap_i[:, 1], c=labels, cmap=cmap) annots = np.arange(K) if vol_ind is not None: annots = annots[vol_ind] for i, k in enumerate(annots): ax.annotate(str(k), umap_i[i] + np.array([.1, .1])) plt.xlabel('UMAP1') plt.ylabel('UMAP2') plt.savefig(f'{subdir}/umap_annotated.png')
def main(args): log(args) torch.set_grad_enabled(False) use_cuda = torch.cuda.is_available() log('Use cuda {}'.format(use_cuda)) if use_cuda: torch.set_default_tensor_type(torch.cuda.FloatTensor) t1 = time.time() ref, _ = mrc.parse_mrc(args.ref) log('Loaded {} volume'.format(ref.shape)) vol, _ = mrc.parse_mrc(args.vol) log('Loaded {} volume'.format(vol.shape)) projector = VolumeAligner(vol, vol_ref=ref, maxD=args.max_D, flip=args.flip) if use_cuda: projector.use_cuda() r_resol = args.r_resol quats = so3_grid.grid_SO3(r_resol) q_id = np.arange(len(quats)) q_id = np.stack([q_id // (6 * 2**r_resol), q_id % (6 * 2**r_resol)], -1) rots = GridPose(quats, q_id) t_resol = 0 T_EXTENT = vol.shape[0] / 16 if args.t_extent is None else args.t_extent T_NGRID = args.t_grid trans = shift_grid3.base_shift_grid(T_EXTENT, T_NGRID) t_id = np.stack(shift_grid3.get_base_id(np.arange(len(trans)), T_NGRID), -1) trans = GridPose(trans, t_id) max_keep_r = args.keep_r max_keep_t = args.keep_t #rot_tracker = MinPoseTracker(max_keep_r, 4, 2) #tr_tracker = MinPoseTracker(max_keep_t, 3, 3) for it in range(args.niter): log('Iteration {}'.format(it)) log('Generating {} rotations'.format(len(rots))) log('Generating {} translations'.format(len(trans))) pose_err = np.empty((len(rots), len(trans)), dtype=np.float32) #rot_tracker.clear() #tr_tracker.clear() r_iterator = data.DataLoader(rots, batch_size=args.rb, shuffle=False) t_iterator = data.DataLoader(trans, batch_size=args.tb, shuffle=False) r_it = 0 for rot, r_id in r_iterator: if use_cuda: rot = rot.cuda() vr, vi = projector.rotate(rot) t_it = 0 for tr, t_id in t_iterator: if use_cuda: tr = tr.cuda() vtr, vti = projector.translate( vr, vi, tr.expand(rot.size(0), *tr.shape)) # todo: check volume err = projector.compute_err(vtr, vti) # R x T pose_err[r_it:r_it + len(rot), t_it:t_it + len(tr)] = err.cpu().numpy() #r_err = err.min(1)[0] #min_r_err, min_r_i = r_err.sort() #rot_tracker.add(min_r_err[:max_keep_r], rot[min_r_i][:max_keep_r], r_id[min_r_i][:max_keep_r]) #t_err= err.min(0)[0] #min_t_err, min_t_i = t_err.sort() #tr_tracker.add(min_t_err[:max_keep_t], tr[min_t_i][:max_keep_t], t_id[min_t_i][:max_keep_t]) t_it += len(tr) r_it += len(rot) r_err = pose_err.min(1) r_err_argmin = r_err.argsort()[:max_keep_r] t_err = pose_err.min(0) t_err_argmin = t_err.argsort()[:max_keep_t] # lstart #r = rots.pose[r_err_argmin[0]] #t = trans.pose[t_err_argmin[0]] #log('Best rot: {}'.format(r)) #log('Best trans: {}'.format(t)) #vr, vi = projector_full.rotate(torch.tensor(r).unsqueeze(0)) #vr, vi = projector_full.translate(vr, vi, torch.tensor(t).view(1,1,3)) #err = projector_full.compute_err(vr,vi) #w = np.where(r_err[r_err_argmin] > err.item())[0] rots, rots_id = subdivide_r(rots.pose[r_err_argmin], rots.pose_id[r_err_argmin], r_resol) rots = GridPose(rots, rots_id) t_err = pose_err.min(0) t_err_argmin = t_err.argsort()[:max_keep_t] trans, trans_id = subdivide_t(trans.pose_id[t_err_argmin], t_resol, T_EXTENT, T_NGRID) trans = GridPose(trans, trans_id) r_resol += 1 t_resol += 1 vlog(r_err[r_err_argmin]) vlog(t_err[t_err_argmin]) #log(rot_tracker.min_errs) #log(tr_tracker.min_errs) r = rots.pose[r_err_argmin[0]] t = trans.pose[t_err_argmin[0]] * vol.shape[0] / args.max_D log('Best rot: {}'.format(r)) log('Best trans: {}'.format(t)) t *= 2 / vol.shape[0] projector = VolumeAligner(vol, vol_ref=ref, maxD=vol.shape[0], flip=args.flip) if use_cuda: projector.use_cuda() vr = projector.real_tform( torch.tensor(r).unsqueeze(0), torch.tensor(t).view(1, 1, 3)) v = vr.squeeze().cpu().numpy() log('Saving {}'.format(args.o)) mrc.write(args.o, v.astype(np.float32)) td = time.time() - t1 log('Finished in {}s'.format(td))
def main(args): mkbasedir(args.o) warnexists(args.o) assert (args.o.endswith('.mrcs') or args.o.endswith('mrc') ), "Must specify output in .mrc(s) file format" old = dataset.load_particles(args.mrcs, lazy=True, datadir=args.datadir) oldD = old[0].get().shape[0] assert args.D <= oldD, f'New box size {args.D} cannot be larger than the original box size {oldD}' assert args.D % 2 == 0, 'New box size must be even' D = args.D start = int(oldD / 2 - D / 2) stop = int(oldD / 2 + D / 2) ### Downsample volume ### if args.is_vol: oldft = fft.htn_center(np.array([x.get() for x in old])) log(oldft.shape) newft = oldft[start:stop, start:stop, start:stop] log(newft.shape) new = fft.ihtn_center(newft).astype(np.float32) log(f'Saving {args.o}') mrc.write(args.o, new, is_vol=True) ### Downsample images ### elif args.chunk is None: new = [] for i in range(len(old)): if i % 1000 == 0: log(f'Processing image {i} of {len(old)}') img = old[i] oldft = fft.ht2_center(img.get()).astype(np.float32) newft = oldft[start:stop, start:stop] new.append(fft.ihtn_center(newft).astype(np.float32)) assert oldft[int(oldD / 2), int(oldD / 2)] == newft[int(D / 2), int(D / 2)] new = np.asarray(new) log(new.shape) log('Saving {}'.format(args.o)) mrc.write(args.o, new, is_vol=False) ### Downsample images, saving chunks of N images ### else: chunk_names = [] nchunks = math.ceil(len(old) / args.chunk) for i in range(nchunks): log('Processing chunk {}'.format(i)) out_mrcs = '.{}'.format(i).join(os.path.splitext(args.o)) new = [] for img in old[i * args.chunk:(i + 1) * args.chunk]: oldft = fft.ht2_center(img.get()).astype(np.float32) newft = oldft[start:stop, start:stop] new.append(fft.ihtn_center(newft).astype(np.float32)) assert oldft[int(oldD / 2), int(oldD / 2)] == newft[int(D / 2), int(D / 2)] new = np.asarray(new) log(new.shape) log(f'Saving {out_mrcs}'.format(out_mrcs)) mrc.write(out_mrcs, new, is_vol=False) chunk_names.append(os.path.basename(out_mrcs)) # Write a text file with all chunks out_txt = '{}.txt'.format(os.path.splitext(args.o)[0]) log(f'Saving {out_txt}') with open(out_txt, 'w') as f: f.write('\n'.join(chunk_names))
def main(args): check_inputs(args) t1 = dt.now() ## set the device use_cuda = torch.cuda.is_available() log('Use cuda {}'.format(use_cuda)) if use_cuda: torch.set_default_tensor_type(torch.cuda.FloatTensor) if args.config is not None: args = config.load_config(args.config, args) log(args) if args.downsample: assert args.downsample % 2 == 0, "Boxsize must be even" assert args.downsample < args.D, "Must be smaller than original box size" D = args.D + 1 lattice = Lattice(D, extent=args.l_extent) if args.enc_mask: args.enc_mask = lattice.get_circular_mask(args.enc_mask) in_dim = args.enc_mask.sum() else: in_dim = lattice.D**2 model = HetOnlyVAE(lattice, args.qlayers, args.qdim, args.players, args.pdim, in_dim, args.zdim, encode_mode=args.encode_mode, enc_mask=args.enc_mask, enc_type=args.pe_type, enc_dim=args.pe_dim, domain=args.domain) log('Loading weights from {}'.format(args.weights)) checkpoint = torch.load(args.weights) model.load_state_dict(checkpoint['model_state_dict']) model.eval() ### Multiple z ### if args.z_start or args.zfile: ### Get z values if args.z_start: args.z_start = np.array(args.z_start) args.z_end = np.array(args.z_end) z = np.repeat(np.arange(args.n, dtype=np.float32), args.zdim).reshape((args.n, args.zdim)) z *= ((args.z_end - args.z_start) / (args.n - 1)) z += args.z_start else: z = np.loadtxt(args.zfile).reshape(-1, args.zdim) if not os.path.exists(args.o): os.makedirs(args.o) log(f'Generating {len(z)} volumes') for i, zz in enumerate(z): log(zz) if args.downsample: extent = lattice.extent * (args.downsample / args.D) vol = model.decoder.eval_volume( lattice.get_downsample_coords(args.downsample + 1), args.downsample + 1, extent, args.norm, zz) else: vol = model.decoder.eval_volume(lattice.coords, lattice.D, lattice.extent, args.norm, zz) out_mrc = '{}/{}{:03d}.mrc'.format(args.o, args.prefix, i) if args.flip: vol = vol[::-1] mrc.write(out_mrc, vol.astype(np.float32), Apix=args.Apix) ### Single z ### else: z = np.array(args.z) log(z) if args.downsample: extent = lattice.extent * (args.downsample / args.D) vol = model.decoder.eval_volume( lattice.get_downsample_coords(args.downsample + 1), args.downsample + 1, extent, args.norm, z) else: vol = model.decoder.eval_volume(lattice.coords, lattice.D, lattice.extent, args.norm, z) if args.flip: vol = vol[::-1] mrc.write(args.o, vol.astype(np.float32), Apix=args.Apix) td = dt.now() - t1 log('Finsihed in {}'.format(td))
def main(args): for out in (args.o, args.out_png, args.out_pose): if not out: continue mkbasedir(out) warnexists(out) if args.t_extent == 0.: log('Not shifting images') else: assert args.t_extent > 0 if args.seed is not None: np.random.seed(args.seed) torch.manual_seed(args.seed) use_cuda = torch.cuda.is_available() log('Use cuda {}'.format(use_cuda)) if use_cuda: torch.set_default_tensor_type(torch.cuda.FloatTensor) t1 = time.time() vol, _ = mrc.parse_mrc(args.mrc) log('Loaded {} volume'.format(vol.shape)) if args.tilt: theta = args.tilt*np.pi/180 args.tilt = np.array([[1.,0.,0.], [0, np.cos(theta), -np.sin(theta)], [0, np.sin(theta), np.cos(theta)]]).astype(np.float32) projector = Projector(vol, args.tilt) if use_cuda: projector.lattice = projector.lattice.cuda() projector.vol = projector.vol.cuda() if args.grid is not None: rots = GridRot(args.grid) log('Generating {} rotations at resolution level {}'.format(len(rots), args.grid)) else: log('Generating {} random rotations'.format(args.N)) rots = RandomRot(args.N) log('Projecting...') imgs = [] iterator = data.DataLoader(rots, batch_size=args.b) for i, rot in enumerate(iterator): vlog('Projecting {}/{}'.format((i+1)*len(rot), args.N)) projections = projector.project(rot) projections = projections.cpu().numpy() imgs.append(projections) rots = rots.rots.cpu().numpy() imgs = np.vstack(imgs) td = time.time()-t1 log('Projected {} images in {}s ({}s per image)'.format(args.N, td, td/args.N )) if args.t_extent: log('Shifting images between +/- {} pixels'.format(args.t_extent)) trans = np.random.rand(args.N,2)*2*args.t_extent - args.t_extent imgs = np.asarray([translate_img(img, t) for img,t in zip(imgs,trans)]) # convention: we want the first column to be x shift and second column to be y shift # reverse columns since current implementation of translate_img uses scipy's # fourier_shift, which is flipped the other way # convention: save the translation that centers the image trans = -trans[:,::-1] # convert translation from pixel to fraction D = imgs.shape[-1] assert D % 2 == 0 trans /= D log('Saving {}'.format(args.o)) mrc.write(args.o,imgs.astype(np.float32)) log('Saving {}'.format(args.out_pose)) with open(args.out_pose,'wb') as f: if args.t_extent: pickle.dump((rots,trans),f) else: pickle.dump(rots, f) if args.out_png: log('Saving {}'.format(args.out_png)) plot_projections(args.out_png, imgs[:9])