Esempio n. 1
0
def main(args):
    # load particles
    particles = dataset.load_particles(args.mrcs, datadir=args.datadir)
    log(particles.shape)
    Nimg, D, D = particles.shape

    trans = utils.load_pkl(args.trans)
    if type(trans) is tuple:
        trans = trans[1]
    trans *= args.tscale
    assert np.all(
        trans <= 1
    ), "ERROR: Old pose format detected. Translations must be in units of fraction of box."
    trans *= D  # convert to pixels
    assert len(trans) == Nimg

    xx, yy = np.meshgrid(np.arange(-D / 2, D / 2), np.arange(-D / 2, D / 2))
    TCOORD = np.stack([xx, yy], axis=2) / D  # DxDx2

    imgs = []
    for ii in range(Nimg):
        ff = fft.fft2_center(particles[ii])
        tfilt = np.dot(TCOORD, trans[ii]) * -2 * np.pi
        tfilt = np.cos(tfilt) + np.sin(tfilt) * 1j
        ff *= tfilt
        img = fft.ifftn_center(ff)
        imgs.append(img)

    imgs = np.asarray(imgs).astype(np.float32)
    mrc.write(args.o, imgs)

    if args.out_png:
        plot_projections(args.out_png, imgs[:9])
Esempio n. 2
0
def main(args):
    assert args.input.endswith('.mrc'), "Input volume must be .mrc file"
    assert args.o.endswith('.mrc'), "Output volume must be .mrc file"
    x, h = mrc.parse_mrc(args.input)
    x = x[::-1]
    mrc.write(args.o, x, header=h)
    log(f'Wrote {args.o}')
Esempio n. 3
0
def mask_volume(volpath, outpath, Apix, thresh=None, dilate=3, dist=10):
    '''
    Helper function to generate a loose mask around the input density
    Density is thresholded to 50% maximum intensity, dilated outwards, and a soft cosine edge is applied

    Inputs
        volpath: an absolute path to the volume to be used for masking
        outpath: an absolute path to write out the mask mrc
        thresh: what intensity threshold between [0, 100] to apply
        dilate: how far to dilate the thresholded density outwards
        dist: how far the cosine edge extends from the density

    Outputs
       volume.masked.mrc written to outdir
    '''
    vol = mrc.parse_mrc(volpath)[0]
    thresh = np.percentile(vol, 99.99) / 2 if thresh is None else thresh
    x = (vol >= thresh).astype(bool)
    x = binary_dilation(x, iterations=dilate)
    y = distance_transform_edt(~x.astype(bool))
    y[y > dist] = dist
    z = np.cos(np.pi * y / dist / 2)

    # check that mask is in range [0,1]
    assert np.all(z >= 0)
    assert np.all(z <= 1)

    # used to write out mask separately from masked volume, now apply and save the masked vol to minimize future I/O
    # mrc.write(outpath, z.astype(np.float32))
    vol *= z
    mrc.write(outpath, vol.astype(np.float32), Apix=Apix)
Esempio n. 4
0
def main(args):
    x = dataset.load_particles(args.input, lazy=True)
    log(f'Loaded {len(x)} particles')
    ind = utils.load_pkl(args.ind)
    x = np.array([x[i].get() for i in ind])
    log(f'New stack dimensions: {x.shape}')
    mrc.write(args.o, x)
Esempio n. 5
0
def main(args):
    assert args.input.endswith('.mrc'), "Input volume must be .mrc file"
    assert args.o.endswith('.mrc'), "Output volume must be .mrc file"
    x, h = mrc.parse_mrc(args.input)
    h.update_apix(args.apix)
    if args.invert:
        x *= -1
    if args.flip:
        x = x[::-1]
    mrc.write(args.o, x, header=h)
    log(f'Wrote {args.o}')
Esempio n. 6
0
def save_checkpoint(model, lattice, optim, epoch, norm, Apix, out_mrc, out_weights):
    model.eval()
    if isinstance(model, nn.DataParallel):
        model = model.module
    vol = model.eval_volume(lattice.coords, lattice.D, lattice.extent, norm)
    mrc.write(out_mrc, vol.astype(np.float32), Apix=Apix)
    torch.save({
        'norm': norm,
        'epoch':epoch,
        'model_state_dict':model.state_dict(),
        'optimizer_state_dict':optim.state_dict(),
        }, out_weights)
Esempio n. 7
0
def make_mask(outdir, K, dilate, thresh, in_mrc=None):
    if in_mrc is None:
        if thresh is None:
            thresh = []
            for i in range(K):
                vol = mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0]
                thresh.append(np.percentile(vol, 99.99) / 2)
            thresh = np.mean(thresh)
        log(f'Threshold: {thresh}')
        log(f'Dilating mask by: {dilate}')

        def binary_mask(vol):
            x = (vol >= thresh).astype(bool)
            x = binary_dilation(x, iterations=dilate)
            return x

        # combine all masks by taking their union
        vol = mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_000.mrc')[0]
        mask = ~binary_mask(vol)
        for i in range(1, K):
            vol = mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0]
            mask *= ~binary_mask(vol)
        mask = ~mask
    else:
        # Load provided mrc and convert to a boolean mask
        mask, _ = mrc.parse_mrc(in_mrc)
        mask = mask.astype(bool)

    # save mask
    out_mrc = f'{outdir}/mask.mrc'
    log(f'Saving {out_mrc}')
    mrc.write(out_mrc, mask.astype(np.float32))

    # view slices
    out_png = f'{outdir}/mask_slices.png'
    D = vol.shape[0]
    fig, ax = plt.subplots(1, 3, figsize=(10, 8))
    ax[0].imshow(mask[D // 2, :, :])
    ax[1].imshow(mask[:, D // 2, :])
    ax[2].imshow(mask[:, :, D // 2])
    plt.savefig(out_png)
Esempio n. 8
0
def main(args):
    imgs = dataset.load_particles(args.mrcs, lazy=True, datadir=args.datadir)
    ctf_params = utils.load_pkl(args.ctf_params)
    assert len(imgs) == len(ctf_params)

    D = imgs[0].get().shape[0]
    fx, fy = np.meshgrid(np.linspace(-.5, .5, D, endpoint=False),
                         np.linspace(-.5, .5, D, endpoint=False))
    freqs = np.stack([fx.ravel(), fy.ravel()], 1)

    imgs_flip = np.empty((len(imgs), D, D), dtype=np.float32)
    for i in range(len(imgs)):
        if i % 1000 == 0: print(i)
        c = ctf.compute_ctf_np(freqs / ctf_params[i, 0], *ctf_params[i, 1:])
        c = c.reshape((D, D))
        ff = fft.fft2_center(imgs[i].get())
        ff *= np.sign(c)
        img = fft.ifftn_center(ff)
        imgs_flip[i] = img.astype(np.float32)

    mrc.write(args.o, imgs_flip)
Esempio n. 9
0
def main(args):
    check_inputs(args)
    t1 = dt.now()

    ## set the device
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')
    log('Use cuda {}'.format(use_cuda))
    if not use_cuda:
        log('WARNING: No GPUs detected')

    log(args)
    cfg = config.overwrite_config(args.config, args)
    log('Loaded configuration:')
    pprint.pprint(cfg)

    D = cfg['lattice_args']['D']  # image size + 1
    zdim = cfg['model_args']['zdim']
    norm = cfg['dataset_args']['norm']

    if args.downsample:
        assert args.downsample % 2 == 0, "Boxsize must be even"
        assert args.downsample <= D - 1, "Must be smaller than original box size"

    model, lattice = HetOnlyVAE.load(cfg, args.weights, device=device)
    model.eval()

    ### Multiple z ###
    if args.z_start or args.zfile:

        ### Get z values
        if args.z_start:
            args.z_start = np.array(args.z_start)
            args.z_end = np.array(args.z_end)
            z = np.repeat(np.arange(args.n, dtype=np.float32), zdim).reshape(
                (args.n, zdim))
            z *= ((args.z_end - args.z_start) / (args.n - 1))
            z += args.z_start
        else:
            z = np.loadtxt(args.zfile).reshape(-1, zdim)

        if not os.path.exists(args.o):
            os.makedirs(args.o)

        log(f'Generating {len(z)} volumes')
        for i, zz in enumerate(z):
            log(zz)
            if args.downsample:
                extent = lattice.extent * (args.downsample / (D - 1))
                vol = model.decoder.eval_volume(
                    lattice.get_downsample_coords(args.downsample + 1),
                    args.downsample + 1, extent, norm, zz)
            else:
                vol = model.decoder.eval_volume(lattice.coords, lattice.D,
                                                lattice.extent, norm, zz)
            out_mrc = '{}/{}{:03d}.mrc'.format(args.o, args.prefix, i)
            if args.flip:
                vol = vol[::-1]
            if args.invert:
                vol *= -1
            mrc.write(out_mrc, vol.astype(np.float32), Apix=args.Apix)

    ### Single z ###
    else:
        z = np.array(args.z)
        log(z)
        if args.downsample:
            extent = lattice.extent * (args.downsample / (D - 1))
            vol = model.decoder.eval_volume(
                lattice.get_downsample_coords(args.downsample + 1),
                args.downsample + 1, extent, norm, z)
        else:
            vol = model.decoder.eval_volume(lattice.coords, lattice.D,
                                            lattice.extent, norm, z)
        if args.flip:
            vol = vol[::-1]
        if args.invert:
            vol *= -1
        mrc.write(args.o, vol.astype(np.float32), Apix=args.Apix)

    td = dt.now() - t1
    log('Finished in {}'.format(td))
Esempio n. 10
0
def main(args):
    mkbasedir(args.o)
    warnexists(args.o)
    assert (args.o.endswith('.mrcs') or args.o.endswith('mrc')
            ), "Must specify output in .mrc(s) file format"

    lazy = not args.is_vol
    old = dataset.load_particles(args.mrcs,
                                 lazy=lazy,
                                 datadir=args.datadir,
                                 relion31=args.relion31)

    oldD = old[0].get().shape[0] if lazy else old.shape[-1]
    assert args.D <= oldD, f'New box size {args.D} cannot be larger than the original box size {oldD}'
    assert args.D % 2 == 0, 'New box size must be even'

    D = args.D
    start = int(oldD / 2 - D / 2)
    stop = int(oldD / 2 + D / 2)

    def _combine_imgs(imgs):
        ret = []
        for img in imgs:
            img.shape = (1, *img.shape)  # (D,D) -> (1,D,D)
        cur = imgs[0]
        for img in imgs[1:]:
            if img.fname == cur.fname and img.offset == cur.offset + 4 * np.product(
                    cur.shape):
                cur.shape = (cur.shape[0] + 1, *cur.shape[1:])
            else:
                ret.append(cur)
                cur = img
        ret.append(cur)
        return ret

    def downsample_images(imgs):
        if lazy:
            imgs = _combine_imgs(imgs)
            imgs = np.concatenate([i.get() for i in imgs])
        with Pool(min(args.max_threads, mp.cpu_count())) as p:
            oldft = np.asarray(p.map(fft.ht2_center, imgs))
            newft = oldft[:, start:stop, start:stop]
            new = np.asarray(p.map(fft.iht2_center, newft))
        return new

    def downsample_in_batches(old, b):
        new = np.empty((len(old), D, D), dtype=np.float32)
        for ii in range(math.ceil(len(old) / b)):
            log(f'Processing batch {ii}')
            new[ii * b:(ii + 1) * b, :, :] = downsample_images(
                old[ii * b:(ii + 1) * b])
        return new

    ### Downsample volume ###
    if args.is_vol:
        oldft = fft.htn_center(old)
        log(oldft.shape)
        newft = oldft[start:stop, start:stop, start:stop]
        log(newft.shape)
        new = fft.ihtn_center(newft).astype(np.float32)
        log(f'Saving {args.o}')
        mrc.write(args.o, new, is_vol=True)

    ### Downsample images ###
    elif args.chunk is None:
        new = downsample_in_batches(old, args.b)
        log(new.shape)
        log('Saving {}'.format(args.o))
        mrc.write(args.o, new.astype(np.float32), is_vol=False)

    ### Downsample images, saving chunks of N images ###
    else:
        nchunks = math.ceil(len(old) / args.chunk)
        out_mrcs = [
            '.{}'.format(i).join(os.path.splitext(args.o))
            for i in range(nchunks)
        ]
        chunk_names = [os.path.basename(x) for x in out_mrcs]
        for i in range(nchunks):
            log('Processing chunk {}'.format(i))
            chunk = old[i * args.chunk:(i + 1) * args.chunk]
            new = downsample_in_batches(chunk, args.b)
            log(new.shape)
            log(f'Saving {out_mrcs[i]}')
            mrc.write(out_mrcs[i], new, is_vol=False)
        # Write a text file with all chunks
        out_txt = '{}.txt'.format(os.path.splitext(args.o)[0])
        log(f'Saving {out_txt}')
        with open(out_txt, 'w') as f:
            f.write('\n'.join(chunk_names))
Esempio n. 11
0
def main(args):
    assert args.o.endswith('.mrc')

    t1 = time.time()
    log(args)
    if not os.path.exists(os.path.dirname(args.o)):
        os.makedirs(os.path.dirname(args.o))

    ## set the device
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')
    log('Use cuda {}'.format(use_cuda))
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    else:
        log('WARNING: No GPUs detected')

    # load the particles
    if args.tilt is None:
        data = dataset.LazyMRCData(args.particles,
                                   norm=(0, 1),
                                   invert_data=args.invert_data,
                                   datadir=args.datadir)
        tilt = None
    else:
        data = dataset.TiltMRCData(args.particles,
                                   args.tilt,
                                   norm=(0, 1),
                                   invert_data=args.invert_data,
                                   datadir=args.datadir)
        tilt = torch.tensor(utils.xrot(args.tilt_deg).astype(np.float32))
    D = data.D
    Nimg = data.N

    lattice = Lattice(D, extent=D // 2)

    posetracker = PoseTracker.load(args.poses, Nimg, D, None, None)

    if args.ctf is not None:
        log('Loading ctf params from {}'.format(args.ctf))
        ctf_params = ctf.load_ctf_for_training(D - 1, args.ctf)
        ctf_params = torch.tensor(ctf_params)
    else:
        ctf_params = None
    Apix = ctf_params[0, 0] if ctf_params is not None else 1

    V = torch.zeros((D, D, D))
    counts = torch.zeros((D, D, D))

    mask = lattice.get_circular_mask(D // 2)

    if args.ind:
        iterator = pickle.load(open(args.ind, 'rb'))
    elif args.first:
        args.first = min(args.first, Nimg)
        iterator = range(args.first)
    else:
        iterator = range(Nimg)

    for ii in iterator:
        if ii % 100 == 0: log('image {}'.format(ii))
        r, t = posetracker.get_pose(ii)
        ff = data.get(ii)
        if tilt is not None:
            ff, ff_tilt = ff  # EW
        ff = torch.tensor(ff)
        ff = ff.view(-1)[mask]
        if ctf_params is not None:
            freqs = lattice.freqs2d / ctf_params[ii, 0]
            c = ctf.compute_ctf(freqs, *ctf_params[ii, 1:]).view(-1)[mask]
            ff *= c.sign()
        if t is not None:
            ff = lattice.translate_ht(ff.view(1, -1), t.view(1, 1, 2),
                                      mask).view(-1)
        ff_coord = lattice.coords[mask] @ r
        add_slice(V, counts, ff_coord, ff, D)

        # tilt series
        if args.tilt is not None:
            ff_tilt = torch.tensor(ff_tilt)
            ff_tilt = ff_tilt.view(-1)[mask]
            if ctf_params is not None:
                ff_tilt *= c.sign()
            if t is not None:
                ff_tilt = lattice.translate_ht(ff_tilt.view(1, -1),
                                               t.view(1, 1, 2), mask).view(-1)
            ff_coord = lattice.coords[mask] @ tilt @ r
            add_slice(V, counts, ff_coord, ff_tilt, D)

    td = time.time() - t1
    log('Backprojected {} images in {}s ({}s per image)'.format(
        len(iterator), td, td / Nimg))
    counts[counts == 0] = 1
    V /= counts
    V = fft.ihtn_center(V[0:-1, 0:-1, 0:-1].cpu().numpy())
    mrc.write(args.o, V.astype('float32'), Apix=Apix)
Esempio n. 12
0
def main(args):
    mkbasedir(args.o)
    warnexists(args.o)
    assert (
        args.o.endswith('.mrcs')
        or args.o.endswith('.txt')), "Must specify output in .mrcs file format"

    # load images
    lazy = args.lazy
    images = dataset.load_particles(args.mrcs,
                                    lazy=lazy,
                                    datadir=args.datadir,
                                    relion31=args.relion31)

    # filter images
    if args.ind is not None:
        log(f'Filtering image dataset with {args.ind}')
        ind = utils.load_pkl(args.ind).astype(int)
        images = [images[i] for i in ind] if lazy else images[ind]

    original_D = images[0].get().shape[0] if lazy else images.shape[-1]
    log(f'Loading {len(images)} {original_D}x{original_D} images')
    window = args.window
    invert_data = args.invert_data
    downsample = (args.D and args.D < original_D)
    if downsample:
        assert args.D <= original_D, f'New box size {args.D} cannot be larger than the original box size {D}'
        assert args.D % 2 == 0, 'New box size must be even'
        start = int(original_D / 2 - args.D / 2)
        stop = int(original_D / 2 + args.D / 2)
        D = args.D
        log(f'Downsampling images to {D}x{D}')
    else:
        D = original_D

    def _combine_imgs(imgs):
        ret = []
        for img in imgs:
            img.shape = (1, *img.shape)  # (D,D) -> (1,D,D)
        cur = imgs[0]
        for img in imgs[1:]:
            if img.fname == cur.fname and img.offset == cur.offset + 4 * np.product(
                    cur.shape):
                cur.shape = (cur.shape[0] + 1, *cur.shape[1:])
            else:
                ret.append(cur)
                cur = img
        ret.append(cur)
        return ret

    def preprocess(imgs):
        if lazy:
            imgs = _combine_imgs(imgs)
            imgs = np.concatenate([i.get() for i in imgs])
        with Pool(min(args.max_threads, mp.cpu_count())) as p:
            # todo: refactor as a routine in dataset.py

            # note: applying the window before downsampling is slightly
            # different than in the original workflow
            if window:
                imgs *= dataset.window_mask(original_D, args.window_r, .99)
            ret = np.asarray(p.map(fft.ht2_center, imgs))
            if invert_data:
                ret *= -1
            if downsample:
                ret = ret[:, start:stop, start:stop]
            ret = fft.symmetrize_ht(ret)
        return ret

    def preprocess_in_batches(imgs, b):
        ret = np.empty((len(imgs), D + 1, D + 1), dtype=np.float32)
        Nbatches = math.ceil(len(imgs) / b)
        for ii in range(Nbatches):
            log(f'Processing batch of {b} images ({ii+1} of {Nbatches})')
            ret[ii * b:(ii + 1) * b, :, :] = preprocess(imgs[ii * b:(ii + 1) *
                                                             b])
        return ret

    nchunks = math.ceil(len(images) / args.chunk)
    out_mrcs = [
        f'.{i}.ft'.join(os.path.splitext(args.o)) for i in range(nchunks)
    ]
    chunk_names = [os.path.basename(x) for x in out_mrcs]
    for i in range(nchunks):
        log(f'Processing chunk {i+1} of {nchunks}')
        chunk = images[i * args.chunk:(i + 1) * args.chunk]
        new = preprocess_in_batches(chunk, args.b)
        log(f'New shape: {new.shape}')
        log(f'Saving {out_mrcs[i]}')
        mrc.write(out_mrcs[i], new, is_vol=False)

    out_txt = f'{os.path.splitext(args.o)[0]}.ft.txt'
    log(f'Saving summary txt file {out_txt}')
    with open(out_txt, 'w') as f:
        f.write('\n'.join(chunk_names))
Esempio n. 13
0
def analyze_volumes(outdir,
                    K,
                    dim,
                    M,
                    linkage,
                    vol_ind=None,
                    plot_dim=5,
                    particle_ind_orig=None):
    cmap = choose_cmap(M)

    # load mean volume, compute it if it does not exist
    if not os.path.exists(f'{outdir}/kmeans{K}/vol_mean.mrc'):
        volm = np.array([
            mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0]
            for i in range(K)
        ]).mean(axis=0)
        mrc.write(f'{outdir}/kmeans{K}/vol_mean.mrc', volm)
    else:
        volm = mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_mean.mrc')[0]

    # load mask
    mask = mrc.parse_mrc(f'{outdir}/mask.mrc')[0].astype(bool)
    log(f'{mask.sum()} voxels in mask')

    # load volumes
    vols = np.array([
        mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0][mask]
        for i in range(K)
    ])
    vols[vols < 0] = 0

    # load umap
    umap = utils.load_pkl(f'{outdir}/umap.pkl')
    ind = np.loadtxt(f'{outdir}/kmeans{K}/centers_ind.txt').astype(int)

    if vol_ind is not None:
        log(f'Filtering to {len(vol_ind)} volumes')
        vols = vols[vol_ind]
        ind = ind[vol_ind]

    # compute PCA
    pca = PCA(dim)
    pca.fit(vols)
    pc = pca.transform(vols)
    utils.save_pkl(pc, f'{outdir}/vol_pca_{K}.pkl')
    utils.save_pkl(pca, f'{outdir}/vol_pca_obj.pkl')
    log('Explained variance ratio:')
    log(pca.explained_variance_ratio_)

    # save rxn coordinates
    for i in range(plot_dim):
        subdir = f'{outdir}/vol_pcs/pc{i+1}'
        if not os.path.exists(subdir):
            os.makedirs(subdir)
        min_, max_ = pc[:, i].min(), pc[:, i].max()
        log((min_, max_))
        for j, val in enumerate(np.linspace(min_, max_, 10, endpoint=True)):
            v = volm.copy()
            v[mask] += pca.components_[i] * val
            mrc.write(f'{subdir}/{j}.mrc', v)

    # which plots to show???
    def plot(i, j):
        plt.figure()
        plt.scatter(pc[:, i], pc[:, j])
        plt.xlabel(
            f'Volume PC{i+1} (EV: {pca.explained_variance_ratio_[i]:03f})')
        plt.ylabel(
            f'Volume PC{j+1} (EV: {pca.explained_variance_ratio_[j]:03f})')
        plt.savefig(f'{outdir}/vol_pca_{K}_{i+1}_{j+1}.png')

    for i in range(plot_dim - 1):
        plot(i, i + 1)

    # clustering
    subdir = f'{outdir}/clustering_L2_{linkage}_{M}'
    if not os.path.exists(subdir):
        os.makedirs(subdir)
    cluster = AgglomerativeClustering(n_clusters=M,
                                      affinity='euclidean',
                                      linkage=linkage)
    labels = cluster.fit_predict(vols)
    utils.save_pkl(labels, f'{subdir}/state_labels.pkl')

    kmeans_labels = utils.load_pkl(f'{outdir}/kmeans{K}/labels.pkl')
    kmeans_counts = Counter(kmeans_labels)
    for i in range(M):
        vol_i = np.where(labels == i)[0]
        log(f'State {i}: {len(vol_i)} volumes')
        if vol_ind is not None:
            vol_i = np.arange(K)[vol_ind][vol_i]
        vol_i_all = np.array([
            mrc.parse_mrc(f'{outdir}/kmeans{K}/vol_{i:03d}.mrc')[0]
            for i in vol_i
        ])
        nparticles = np.array([kmeans_counts[i] for i in vol_i])
        vol_i_mean = np.average(vol_i_all, axis=0, weights=nparticles)
        vol_i_std = np.average((vol_i_all - vol_i_mean)**2,
                               axis=0,
                               weights=nparticles)**.5
        mrc.write(f'{subdir}/state_{i}_mean.mrc',
                  vol_i_mean.astype(np.float32))
        mrc.write(f'{subdir}/state_{i}_std.mrc', vol_i_std.astype(np.float32))
        if not os.path.exists(f'{subdir}/state_{i}'):
            os.makedirs(f'{subdir}/state_{i}')
        for v in vol_i:
            os.symlink(f'{outdir}/kmeans{K}/vol_{v:03d}.mrc',
                       f'{subdir}/state_{i}/vol_{v:03d}.mrc')
        particle_ind = analysis.get_ind_for_cluster(kmeans_labels, vol_i)
        log(f'State {i}: {len(particle_ind)} particles')
        if particle_ind_orig is not None:
            utils.save_pkl(particle_ind_orig[particle_ind],
                           f'{subdir}/state_{i}_particle_ind.pkl')
        else:
            utils.save_pkl(particle_ind,
                           f'{subdir}/state_{i}_particle_ind.pkl')

    # plot clustering results
    def hack_barplot(counts_):
        if M <= 20:  # HACK TO GET COLORS
            with sns.color_palette(cmap):
                g = sns.barplot(np.arange(M), counts_)
        else:  # default is husl
            g = sns.barplot(np.arange(M), counts_)
        return g

    plt.figure()
    counts = Counter(labels)
    g = hack_barplot([counts[i] for i in range(M)])
    for i in range(M):
        g.text(i - .1, counts[i] + 2, counts[i])
    plt.xlabel('State')
    plt.ylabel('Count')
    plt.savefig(f'{subdir}/state_volume_counts.png')

    plt.figure()
    particle_counts = [
        np.sum([kmeans_counts[ii] for ii in np.where(labels == i)[0]])
        for i in range(M)
    ]
    g = hack_barplot(particle_counts)
    for i in range(M):
        g.text(i - .1, particle_counts[i] + 2, particle_counts[i])
    plt.xlabel('State')
    plt.ylabel('Count')
    plt.savefig(f'{subdir}/state_particle_counts.png')

    def plot_w_labels(i, j):
        plt.figure()
        plt.scatter(pc[:, i], pc[:, j], c=labels, cmap=cmap)
        plt.xlabel(
            f'Volume PC{i+1} (EV: {pca.explained_variance_ratio_[i]:03f})')
        plt.ylabel(
            f'Volume PC{j+1} (EV: {pca.explained_variance_ratio_[j]:03f})')
        plt.savefig(f'{subdir}/vol_pca_{K}_{i+1}_{j+1}.png')

    for i in range(plot_dim - 1):
        plot_w_labels(i, i + 1)

    def plot_w_labels_annotated(i, j):
        fig, ax = plt.subplots(figsize=(16, 16))
        plt.scatter(pc[:, i], pc[:, j], c=labels, cmap=cmap)
        annots = np.arange(K)
        if vol_ind is not None:
            annots = annots[vol_ind]
        for ii, k in enumerate(annots):
            ax.annotate(str(k), pc[ii, [i, j]] + np.array([.1, .1]))
        plt.xlabel(
            f'Volume PC{i+1} (EV: {pca.explained_variance_ratio_[i]:03f})')
        plt.ylabel(
            f'Volume PC{j+1} (EV: {pca.explained_variance_ratio_[j]:03f})')
        plt.savefig(f'{subdir}/vol_pca_{K}_annotated_{i+1}_{j+1}.png')

    for i in range(plot_dim - 1):
        plot_w_labels_annotated(i, i + 1)

    # plot clusters on UMAP
    umap_i = umap[ind]
    fig, ax = plt.subplots(figsize=(8, 8))
    plt.scatter(umap[:, 0],
                umap[:, 1],
                alpha=.1,
                s=1,
                rasterized=True,
                color='lightgrey')
    colors = get_colors_for_cmap(cmap, M)
    for i in range(M):
        c = umap_i[np.where(labels == i)]
        plt.scatter(c[:, 0], c[:, 1], label=i, color=colors[i])
    plt.legend()
    plt.xlabel('UMAP1')
    plt.ylabel('UMAP2')
    plt.savefig(f'{subdir}/umap.png')

    fig, ax = plt.subplots(figsize=(16, 16))
    plt.scatter(umap[:, 0],
                umap[:, 1],
                alpha=.1,
                s=1,
                rasterized=True,
                color='lightgrey')
    plt.scatter(umap_i[:, 0], umap_i[:, 1], c=labels, cmap=cmap)
    annots = np.arange(K)
    if vol_ind is not None:
        annots = annots[vol_ind]
    for i, k in enumerate(annots):
        ax.annotate(str(k), umap_i[i] + np.array([.1, .1]))
    plt.xlabel('UMAP1')
    plt.ylabel('UMAP2')
    plt.savefig(f'{subdir}/umap_annotated.png')
Esempio n. 14
0
def main(args):
    log(args)
    torch.set_grad_enabled(False)
    use_cuda = torch.cuda.is_available()
    log('Use cuda {}'.format(use_cuda))
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    t1 = time.time()
    ref, _ = mrc.parse_mrc(args.ref)
    log('Loaded {} volume'.format(ref.shape))
    vol, _ = mrc.parse_mrc(args.vol)
    log('Loaded {} volume'.format(vol.shape))

    projector = VolumeAligner(vol,
                              vol_ref=ref,
                              maxD=args.max_D,
                              flip=args.flip)
    if use_cuda:
        projector.use_cuda()

    r_resol = args.r_resol
    quats = so3_grid.grid_SO3(r_resol)
    q_id = np.arange(len(quats))
    q_id = np.stack([q_id // (6 * 2**r_resol), q_id % (6 * 2**r_resol)], -1)
    rots = GridPose(quats, q_id)

    t_resol = 0
    T_EXTENT = vol.shape[0] / 16 if args.t_extent is None else args.t_extent
    T_NGRID = args.t_grid
    trans = shift_grid3.base_shift_grid(T_EXTENT, T_NGRID)
    t_id = np.stack(shift_grid3.get_base_id(np.arange(len(trans)), T_NGRID),
                    -1)
    trans = GridPose(trans, t_id)

    max_keep_r = args.keep_r
    max_keep_t = args.keep_t
    #rot_tracker = MinPoseTracker(max_keep_r, 4, 2)
    #tr_tracker = MinPoseTracker(max_keep_t, 3, 3)
    for it in range(args.niter):
        log('Iteration {}'.format(it))
        log('Generating {} rotations'.format(len(rots)))
        log('Generating {} translations'.format(len(trans)))
        pose_err = np.empty((len(rots), len(trans)), dtype=np.float32)
        #rot_tracker.clear()
        #tr_tracker.clear()
        r_iterator = data.DataLoader(rots, batch_size=args.rb, shuffle=False)
        t_iterator = data.DataLoader(trans, batch_size=args.tb, shuffle=False)
        r_it = 0
        for rot, r_id in r_iterator:
            if use_cuda: rot = rot.cuda()
            vr, vi = projector.rotate(rot)
            t_it = 0
            for tr, t_id in t_iterator:
                if use_cuda: tr = tr.cuda()
                vtr, vti = projector.translate(
                    vr, vi, tr.expand(rot.size(0), *tr.shape))
                # todo: check volume
                err = projector.compute_err(vtr, vti)  # R x T
                pose_err[r_it:r_it + len(rot),
                         t_it:t_it + len(tr)] = err.cpu().numpy()
                #r_err = err.min(1)[0]
                #min_r_err, min_r_i = r_err.sort()
                #rot_tracker.add(min_r_err[:max_keep_r], rot[min_r_i][:max_keep_r], r_id[min_r_i][:max_keep_r])
                #t_err= err.min(0)[0]
                #min_t_err, min_t_i = t_err.sort()
                #tr_tracker.add(min_t_err[:max_keep_t], tr[min_t_i][:max_keep_t], t_id[min_t_i][:max_keep_t])
                t_it += len(tr)
            r_it += len(rot)

        r_err = pose_err.min(1)
        r_err_argmin = r_err.argsort()[:max_keep_r]
        t_err = pose_err.min(0)
        t_err_argmin = t_err.argsort()[:max_keep_t]

        # lstart
        #r = rots.pose[r_err_argmin[0]]
        #t = trans.pose[t_err_argmin[0]]
        #log('Best rot: {}'.format(r))
        #log('Best trans: {}'.format(t))
        #vr, vi = projector_full.rotate(torch.tensor(r).unsqueeze(0))
        #vr, vi = projector_full.translate(vr, vi, torch.tensor(t).view(1,1,3))
        #err = projector_full.compute_err(vr,vi)

        #w = np.where(r_err[r_err_argmin] > err.item())[0]
        rots, rots_id = subdivide_r(rots.pose[r_err_argmin],
                                    rots.pose_id[r_err_argmin], r_resol)
        rots = GridPose(rots, rots_id)

        t_err = pose_err.min(0)
        t_err_argmin = t_err.argsort()[:max_keep_t]
        trans, trans_id = subdivide_t(trans.pose_id[t_err_argmin], t_resol,
                                      T_EXTENT, T_NGRID)
        trans = GridPose(trans, trans_id)
        r_resol += 1
        t_resol += 1
        vlog(r_err[r_err_argmin])
        vlog(t_err[t_err_argmin])
        #log(rot_tracker.min_errs)
        #log(tr_tracker.min_errs)
    r = rots.pose[r_err_argmin[0]]
    t = trans.pose[t_err_argmin[0]] * vol.shape[0] / args.max_D
    log('Best rot: {}'.format(r))
    log('Best trans: {}'.format(t))
    t *= 2 / vol.shape[0]
    projector = VolumeAligner(vol,
                              vol_ref=ref,
                              maxD=vol.shape[0],
                              flip=args.flip)
    if use_cuda: projector.use_cuda()
    vr = projector.real_tform(
        torch.tensor(r).unsqueeze(0),
        torch.tensor(t).view(1, 1, 3))
    v = vr.squeeze().cpu().numpy()
    log('Saving {}'.format(args.o))
    mrc.write(args.o, v.astype(np.float32))

    td = time.time() - t1
    log('Finished in {}s'.format(td))
Esempio n. 15
0
def main(args):
    mkbasedir(args.o)
    warnexists(args.o)
    assert (args.o.endswith('.mrcs') or args.o.endswith('mrc')
            ), "Must specify output in .mrc(s) file format"

    old = dataset.load_particles(args.mrcs, lazy=True, datadir=args.datadir)
    oldD = old[0].get().shape[0]
    assert args.D <= oldD, f'New box size {args.D} cannot be larger than the original box size {oldD}'
    assert args.D % 2 == 0, 'New box size must be even'

    D = args.D
    start = int(oldD / 2 - D / 2)
    stop = int(oldD / 2 + D / 2)

    ### Downsample volume ###
    if args.is_vol:
        oldft = fft.htn_center(np.array([x.get() for x in old]))
        log(oldft.shape)
        newft = oldft[start:stop, start:stop, start:stop]
        log(newft.shape)
        new = fft.ihtn_center(newft).astype(np.float32)
        log(f'Saving {args.o}')
        mrc.write(args.o, new, is_vol=True)

    ### Downsample images ###
    elif args.chunk is None:
        new = []
        for i in range(len(old)):
            if i % 1000 == 0:
                log(f'Processing image {i} of {len(old)}')
            img = old[i]
            oldft = fft.ht2_center(img.get()).astype(np.float32)
            newft = oldft[start:stop, start:stop]
            new.append(fft.ihtn_center(newft).astype(np.float32))
        assert oldft[int(oldD / 2), int(oldD / 2)] == newft[int(D / 2),
                                                            int(D / 2)]
        new = np.asarray(new)
        log(new.shape)
        log('Saving {}'.format(args.o))
        mrc.write(args.o, new, is_vol=False)

    ### Downsample images, saving chunks of N images ###
    else:
        chunk_names = []
        nchunks = math.ceil(len(old) / args.chunk)
        for i in range(nchunks):
            log('Processing chunk {}'.format(i))
            out_mrcs = '.{}'.format(i).join(os.path.splitext(args.o))
            new = []
            for img in old[i * args.chunk:(i + 1) * args.chunk]:
                oldft = fft.ht2_center(img.get()).astype(np.float32)
                newft = oldft[start:stop, start:stop]
                new.append(fft.ihtn_center(newft).astype(np.float32))
            assert oldft[int(oldD / 2), int(oldD / 2)] == newft[int(D / 2),
                                                                int(D / 2)]
            new = np.asarray(new)
            log(new.shape)
            log(f'Saving {out_mrcs}'.format(out_mrcs))
            mrc.write(out_mrcs, new, is_vol=False)
            chunk_names.append(os.path.basename(out_mrcs))
        # Write a text file with all chunks
        out_txt = '{}.txt'.format(os.path.splitext(args.o)[0])
        log(f'Saving {out_txt}')
        with open(out_txt, 'w') as f:
            f.write('\n'.join(chunk_names))
Esempio n. 16
0
def main(args):
    check_inputs(args)
    t1 = dt.now()

    ## set the device
    use_cuda = torch.cuda.is_available()
    log('Use cuda {}'.format(use_cuda))
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    if args.config is not None:
        args = config.load_config(args.config, args)
    log(args)

    if args.downsample:
        assert args.downsample % 2 == 0, "Boxsize must be even"
        assert args.downsample < args.D, "Must be smaller than original box size"
    D = args.D + 1
    lattice = Lattice(D, extent=args.l_extent)
    if args.enc_mask:
        args.enc_mask = lattice.get_circular_mask(args.enc_mask)
        in_dim = args.enc_mask.sum()
    else:
        in_dim = lattice.D**2
    model = HetOnlyVAE(lattice,
                       args.qlayers,
                       args.qdim,
                       args.players,
                       args.pdim,
                       in_dim,
                       args.zdim,
                       encode_mode=args.encode_mode,
                       enc_mask=args.enc_mask,
                       enc_type=args.pe_type,
                       enc_dim=args.pe_dim,
                       domain=args.domain)

    log('Loading weights from {}'.format(args.weights))
    checkpoint = torch.load(args.weights)
    model.load_state_dict(checkpoint['model_state_dict'])

    model.eval()

    ### Multiple z ###
    if args.z_start or args.zfile:

        ### Get z values
        if args.z_start:
            args.z_start = np.array(args.z_start)
            args.z_end = np.array(args.z_end)
            z = np.repeat(np.arange(args.n, dtype=np.float32),
                          args.zdim).reshape((args.n, args.zdim))
            z *= ((args.z_end - args.z_start) / (args.n - 1))
            z += args.z_start
        else:
            z = np.loadtxt(args.zfile).reshape(-1, args.zdim)

        if not os.path.exists(args.o):
            os.makedirs(args.o)

        log(f'Generating {len(z)} volumes')
        for i, zz in enumerate(z):
            log(zz)
            if args.downsample:
                extent = lattice.extent * (args.downsample / args.D)
                vol = model.decoder.eval_volume(
                    lattice.get_downsample_coords(args.downsample + 1),
                    args.downsample + 1, extent, args.norm, zz)
            else:
                vol = model.decoder.eval_volume(lattice.coords, lattice.D,
                                                lattice.extent, args.norm, zz)
            out_mrc = '{}/{}{:03d}.mrc'.format(args.o, args.prefix, i)
            if args.flip:
                vol = vol[::-1]
            mrc.write(out_mrc, vol.astype(np.float32), Apix=args.Apix)

    ### Single z ###
    else:
        z = np.array(args.z)
        log(z)
        if args.downsample:
            extent = lattice.extent * (args.downsample / args.D)
            vol = model.decoder.eval_volume(
                lattice.get_downsample_coords(args.downsample + 1),
                args.downsample + 1, extent, args.norm, z)
        else:
            vol = model.decoder.eval_volume(lattice.coords, lattice.D,
                                            lattice.extent, args.norm, z)
        if args.flip:
            vol = vol[::-1]
        mrc.write(args.o, vol.astype(np.float32), Apix=args.Apix)

    td = dt.now() - t1
    log('Finsihed in {}'.format(td))
Esempio n. 17
0
def main(args):
    for out in (args.o, args.out_png, args.out_pose):
        if not out: continue
        mkbasedir(out)
        warnexists(out)

    if args.t_extent == 0.:
        log('Not shifting images')
    else:
        assert args.t_extent > 0

    if args.seed is not None:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)

    use_cuda = torch.cuda.is_available()
    log('Use cuda {}'.format(use_cuda))
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    t1 = time.time()    
    vol, _ = mrc.parse_mrc(args.mrc)
    log('Loaded {} volume'.format(vol.shape))

    if args.tilt:
        theta = args.tilt*np.pi/180
        args.tilt = np.array([[1.,0.,0.],
                        [0, np.cos(theta), -np.sin(theta)],
                        [0, np.sin(theta), np.cos(theta)]]).astype(np.float32)

    projector = Projector(vol, args.tilt)
    if use_cuda:
        projector.lattice = projector.lattice.cuda()
        projector.vol = projector.vol.cuda()

    if args.grid is not None:
        rots = GridRot(args.grid)
        log('Generating {} rotations at resolution level {}'.format(len(rots), args.grid))
    else:
        log('Generating {} random rotations'.format(args.N))
        rots = RandomRot(args.N)
    
    log('Projecting...')
    imgs = []
    iterator = data.DataLoader(rots, batch_size=args.b)
    for i, rot in enumerate(iterator):
        vlog('Projecting {}/{}'.format((i+1)*len(rot), args.N))
        projections = projector.project(rot)
        projections = projections.cpu().numpy()
        imgs.append(projections)

    rots = rots.rots.cpu().numpy()
    imgs = np.vstack(imgs)
    td = time.time()-t1
    log('Projected {} images in {}s ({}s per image)'.format(args.N, td, td/args.N ))

    if args.t_extent:
        log('Shifting images between +/- {} pixels'.format(args.t_extent))
        trans = np.random.rand(args.N,2)*2*args.t_extent - args.t_extent
        imgs = np.asarray([translate_img(img, t) for img,t in zip(imgs,trans)])
        # convention: we want the first column to be x shift and second column to be y shift
        # reverse columns since current implementation of translate_img uses scipy's 
        # fourier_shift, which is flipped the other way
        # convention: save the translation that centers the image
        trans = -trans[:,::-1]
        # convert translation from pixel to fraction
        D = imgs.shape[-1]
        assert D % 2 == 0
        trans /= D

    log('Saving {}'.format(args.o))
    mrc.write(args.o,imgs.astype(np.float32))
    log('Saving {}'.format(args.out_pose))
    with open(args.out_pose,'wb') as f:
        if args.t_extent:
            pickle.dump((rots,trans),f)
        else:
            pickle.dump(rots, f)
    if args.out_png:
        log('Saving {}'.format(args.out_png))
        plot_projections(args.out_png, imgs[:9])