Beispiel #1
0
def main(args):
    dens = EMData(args.map)
    star = parse_star(args.input, keep_index=False)
    star[["ImageNumber", "ImageName"]] = star['rlnImageName'].str.split("@", expand=True)
    grouped = star.groupby("ImageName")
    pool = None
    if args.nproc > 1:
        pool = Pool(processes=args.nproc)
        results = pool.imap(lambda x: project_stack(x, dens, args.dest), (group for name, group in grouped))
    else:
        results = (project_stack(group, dens, args.dest) for name, group in grouped)
    i = 0
    t = 0
    for r in results:
        i += 1
        t += r
        sys.stdout.write("\rProjected %d particles in %d stacks" % (t, i))
        sys.stdout.flush()

    if pool is not None:
        pool.close()
        pool.join()

    sys.stdout.write('\n')
    sys.stdout.flush()

    return 0
Beispiel #2
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))

    if args.input.endswith(".cs"):
        log.debug("Detected CryoSPARC 2+ .cs file")
        cs = np.load(args.input)
        try:
            df = metadata.parse_cryosparc_2_cs(cs,
                                               passthrough=args.passthrough,
                                               minphic=args.minphic)
        except (KeyError, ValueError) as e:
            log.error(e.message)
            log.error(
                "A passthrough file may be required (check inside the cryoSPARC 2+ job directory)"
            )
            log.debug(e, exc_info=True)
            return 1
    else:
        log.debug("Detected CryoSPARC 0.6.5 .csv file")
        meta = metadata.parse_cryosparc_065_csv(
            args.input)  # Read cryosparc metadata file.
        df = metadata.cryosparc_065_csv2star(meta, args.minphic)

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    if args.copy_micrograph_coordinates is not None:
        coord_star = pd.concat(
            (star.parse_star(inp, keep_index=False)
             for inp in glob(args.copy_micrograph_coordinates)),
            join="inner")
        star.augment_star_ucsf(coord_star)
        star.augment_star_ucsf(df)
        key = star.merge_key(df, coord_star)
        log.debug("Coordinates merge key: %s" % key)
        if args.cached or key == star.Relion.IMAGE_NAME:
            fields = star.Relion.MICROGRAPH_COORDS
        else:
            fields = star.Relion.MICROGRAPH_COORDS + [
                star.UCSF.IMAGE_INDEX, star.UCSF.IMAGE_PATH
            ]
        df = star.smart_merge(df, coord_star, fields=fields, key=key)
        star.simplify_star_ucsf(df)

    if args.micrograph_path is not None:
        df = star.replace_micrograph_path(df,
                                          args.micrograph_path,
                                          inplace=True)

    if args.transform is not None:
        r = np.array(json.loads(args.transform))
        df = star.transform_star(df, r, inplace=True)

    # Write Relion .star file with correct headers.
    star.write_star(args.output, df, reindex=True)
    log.info("Output fields: %s" % ", ".join(df.columns))
    return 0
Beispiel #3
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))

    if args.input[0].endswith(".cs"):
        log.debug("Detected CryoSPARC 2+ .cs file")
        cs = np.load(args.input[0])
        try:
            df = metadata.parse_cryosparc_2_cs(cs, passthroughs=args.input[1:], minphic=args.minphic,
                                               boxsize=args.boxsize, swapxy=args.swapxy,
                                               invertx=args.invertx, inverty=args.inverty)
        except (KeyError, ValueError) as e:
            log.error(e, exc_info=True)
            log.error("Required fields could not be mapped. Are you using the right input file(s)?")
            return 1
    else:
        log.debug("Detected CryoSPARC 0.6.5 .csv file")
        if len(args.input) > 1:
            log.error("Only one file at a time supported for CryoSPARC 0.6.5 .csv format")
            return 1
        meta = metadata.parse_cryosparc_065_csv(args.input[0])  # Read cryosparc metadata file.
        df = metadata.cryosparc_065_csv2star(meta, args.minphic)

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    if args.copy_micrograph_coordinates is not None:
        df = star.augment_star_ucsf(df, inplace=True)
        coord_star = pd.concat(
            (star.parse_star(inp, keep_index=False, augment=True) for inp in
             glob(args.copy_micrograph_coordinates)), join="inner")
        key = star.merge_key(df, coord_star)
        log.debug("Coordinates merge key: %s" % key)
        if args.cached or key == star.Relion.IMAGE_NAME:
            fields = star.Relion.MICROGRAPH_COORDS
        else:
            fields = star.Relion.MICROGRAPH_COORDS + [star.UCSF.IMAGE_INDEX, star.UCSF.IMAGE_PATH]
        df = star.smart_merge(df, coord_star, fields=fields, key=key)
        star.simplify_star_ucsf(df)

    if args.micrograph_path is not None:
        df = star.replace_micrograph_path(df, args.micrograph_path, inplace=True)

    if args.transform is not None:
        r = np.array(json.loads(args.transform))
        df = star.transform_star(df, r, inplace=True)

    df = star.check_defaults(df, inplace=True)

    if args.relion2:
        df = star.remove_new_relion31(df, inplace=True)
        star.write_star(args.output, df, resort_records=True, optics=False)
    else:
        df = star.remove_deprecated_relion2(df, inplace=True)
        star.write_star(args.output, df, resort_records=True, optics=True)

    log.info("Output fields: %s" % ", ".join(df.columns))
    return 0
Beispiel #4
0
def main(args):
    star = parse_star(args.input, keep_index=False)

    if args.cls is not None:
        clsfields = [f for f in star.columns if "ClassNumber" in f]
        if len(clsfields) == 0:
            print("No class labels found")
            return 1
        ind = star[clsfields[0]].isin(args.cls)
        if not np.any(ind):
            print("Specified class has no members")
            return 1
        star = star.loc[ind]

    if args.max_astigmatism is not None:
        astigmatism = star["rlnDefocusU"] - star["rlnDefocusV"]
        ind = astigmatism <= args.max_astigmatism
        star = star.loc[ind]

    if args.max_resolution is not None:
        ind = star["rlnFinalResolution"] <= args.max_resolution
        star = star.loc[ind]

    if args.min_ctf_fom is not None:
        ind = star["rlnCtfFigureOfMerit"] >= args.min_ctf_fom
        star = star.loc[ind]

    if args.subsample is not None:
        if args.subsample < 1:
            args.subsample = np.max(np.round(args.subsample * star.shape[0]),
                                    1)
        star = star.sample(np.int(args.subsample), random_state=args.seed)

    write_star(args.output, star)
    return 0
Beispiel #5
0
def main(args):
    df = parse_star(args.input, keep_index=False)

    if args.cls is not None:
        clsfields = [f for f in df.columns if "ClassNumber" in f]
        if len(clsfields) == 0:
            print("No class labels found")
            return 1
        ind = df[clsfields[0]].isin(args.cls)
        if not np.any(ind):
            print("Specified class has no members")
            return 1
        df = df.loc[ind]

    if args.max_astigmatism is not None:
        astigmatism = df["rlnDefocusU"] - df["rlnDefocusV"]
        ind = astigmatism <= args.max_astigmatism
        df = df.loc[ind]

    if args.max_resolution is not None:
        if "rlnFinalResolution" in df.columns:
            ind = df["rlnFinalResolution"] <= args.max_resolution
        elif "rlnCtfMaxResolution" in df.columns:
            ind = df["rlnCtfMaxResolution"] <= args.max_resolution
        else:
            print("No CTF resolution field found in input")
            return 1
        df = df.loc[ind]

    if args.max_ctf_fom is not None:
        ind = df["rlnCtfFigureOfMerit"] <= args.max_ctf_fom
        df = df.loc[ind]

    if args.min_ctf_fom is not None:
        ind = df["rlnCtfFigureOfMerit"] >= args.min_ctf_fom
        df = df.loc[ind]
    
    if args.min_particles is not None:
        counts = df["rlnMicrographName"].value_counts()
        subset = df.set_index("rlnMicrographName").loc[counts.index[counts > args.min_particles]]
        df = subset.reset_index()

    if args.subsample is not None:
        if args.subsample < 1:
            args.subsample = np.max(np.round(args.subsample * df.shape[0]), 1)
        if args.bootstrap is not None:
            print("Not implemented yet")
            return 1
            inds = np.random.choice(df.shape[0],
                                    size=(np.int(args.subsample),
                                    df.shape[0]/np.int(args.subsample)),
                                    replace=True)
        else:
            df = df.sample(np.int(args.subsample), random_state=args.seed)

    write_star(args.output, df)
    return 0
Beispiel #6
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))
    # apix = args.apix = hdr["xlen"] / hdr["nx"]

    for fn in args.input:
        if not (fn.endswith(".star") or fn.endswith(".mrcs")
                or fn.endswith(".mrc")):
            log.error("Only .star, .mrc, and .mrcs files supported")
            return 1

    first_ptcl = 0
    dfs = []
    with mrc.ZSliceWriter(args.output) as writer:
        for fn in args.input:
            if fn.endswith(".star"):
                df = star.parse_star(fn, keep_index=False)
                star.augment_star_ucsf(df)
                df = df.sort_values([
                    star.UCSF.IMAGE_ORIGINAL_PATH,
                    star.UCSF.IMAGE_ORIGINAL_INDEX
                ])
                gb = df.groupby(star.UCSF.IMAGE_ORIGINAL_PATH)
                for name, g in gb:
                    with mrc.ZSliceReader(name) as reader:
                        for i in g[star.UCSF.IMAGE_ORIGINAL_INDEX].values:
                            writer.write(reader.read(i))
            else:
                with mrc.ZSliceReader(fn) as reader:
                    for img in reader:
                        writer.write(img)
                    df = pd.DataFrame(
                        {star.UCSF.IMAGE_ORIGINAL_INDEX: np.arange(reader.nz)})
                df[star.UCSF.IMAGE_ORIGINAL_PATH] = fn

            if args.star is not None:
                df[star.UCSF.IMAGE_INDEX] = np.arange(first_ptcl,
                                                      first_ptcl + df.shape[0])
                df[star.UCSF.IMAGE_PATH] = writer.path
                df["index"] = df[star.UCSF.IMAGE_INDEX]
                star.simplify_star_ucsf(df)
                dfs.append(df)
            first_ptcl += df.shape[0]

            if args.star is not None:
                df = pd.concat(dfs, join="inner")
                # df = pd.concat(dfs)
                # df = df.dropna(df, axis=1, how="any")
                star.write_star(args.star, df, reindex=True)

    return 0
Beispiel #7
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))
    if args.boxsize is None:
        log.error("Please specify box size")
        return 1
    df = star.parse_star(args.input, keep_index=False)
    if args.cls is not None:
        df = star.select_classes(df, args.cls)
    if args.apix is None:
        args.apix = star.calculate_apix(df)
    nside = 2**args.healpix_order
    angular_sampling = np.sqrt(3 / np.pi) * 60 / nside
    theta, phi = pix2ang(nside, np.arange(12 * nside**2))
    phi = np.pi - phi
    hp = np.column_stack((np.sin(theta) * np.cos(phi),
                          np.sin(theta) * np.sin(phi), np.cos(theta)))
    kdtree = cKDTree(hp)
    st = np.sin(np.deg2rad(df[star.Relion.ANGLETILT]))
    ct = np.cos(np.deg2rad(df[star.Relion.ANGLETILT]))
    sp = np.sin(np.deg2rad(df[star.Relion.ANGLEROT]))
    cp = np.cos(np.deg2rad(df[star.Relion.ANGLEROT]))
    ptcls = np.column_stack((st * cp, st * sp, ct))
    _, idx = kdtree.query(ptcls)
    cnts = np.bincount(idx, minlength=theta.size)
    frac = cnts / np.max(cnts).astype(np.float64)
    mu = np.mean(frac)
    sigma = np.std(frac)
    color_scale = (frac - mu) / sigma
    color_scale[color_scale > 5] = 5
    color_scale[color_scale < -1] = -1
    color_scale /= 6
    color_scale += 1 / 6.
    r = args.boxsize * args.apix / 2
    rp = np.reshape(r + r * frac * args.height_scale, (-1, 1))
    base1 = hp * r
    base2 = hp * rp
    base1 = base1[:, [0, 1, 2]] + np.array([r] * 3)
    base2 = base2[:, [0, 1, 2]] + np.array([r] * 3)
    height = np.squeeze(np.abs(rp - r))
    idx = np.where(height >= 0.01)[0]
    width = args.width_scale * np.pi * r * angular_sampling / 360
    bild = np.hstack((base1, base2, np.ones((base1.shape[0], 1)) * width))
    fmt_color = ".color %f 0 %f\n"
    fmt_cyl = ".cylinder %f %f %f %f %f %f %f\n"
    with open(args.output, "w") as f:
        for i in idx:
            f.write(fmt_color % (color_scale[i], 1 - color_scale[i]))
            f.write(fmt_cyl % tuple(bild[i]))
    return 0
Beispiel #8
0
def main(args):
    if os.path.exists(args.output):
        os.remove(args.output)
    for fn in args.input:
        if fn.endswith(".star"):
            star = parse_star(fn, keep_index=False)
            for p in star["rlnImageName"]:
                stack = p.split("@")[1]
                idx = int(p.split("@")[0]) - 1
                try:
                    img = EMData(stack, idx)
                    img.append_image(args.output)
                except Exception:
                    print("Error at %s" % p)
        else:
            n = EMUtil.get_image_count(fn)
            for i in range(n):
                img = EMData(fn, i)
                img.append_image(args.output)
    return 0
Beispiel #9
0
def main(args):
    pyfftw.interfaces.cache.enable()

    refmap = mrc.read(args.key, compat="relion")
    df = star.parse_star(args.input, keep_index=False)
    star.augment_star_ucsf(df)
    refmap_ft = vop.vol_ft(refmap, threads=args.threads)

    apix = star.calculate_apix(df)
    sz = refmap_ft.shape[0] // 2 - 1
    sx, sy = np.meshgrid(rfftfreq(sz), fftfreq(sz))
    s = np.sqrt(sx**2 + sy**2)
    r = s * sz
    r = np.round(r).astype(np.int64)
    r[r > sz // 2] = sz // 2 + 1
    a = np.arctan2(sy, sx)

    def1 = df["rlnDefocusU"].values
    def2 = df["rlnDefocusV"].values
    angast = df["rlnDefocusAngle"].values
    phase = df["rlnPhaseShift"].values
    kv = df["rlnVoltage"].values
    ac = df["rlnAmplitudeContrast"].values
    cs = df["rlnSphericalAberration"].values
    xshift = df["rlnOriginX"].values
    yshift = df["rlnOriginY"].values

    score = np.zeros(df.shape[0])

    # TODO parallelize
    for i, row in df.iterrows():
        xcor = particle_xcorr(row, refmap_ft)

    if args.top is None:
        args.top = df.shape[0]

    top = df.iloc[np.argsort(score)][:args.top]
    star.simplify_star_ucsf(top)
    star.write_star(args.output, top)
    return 0
Beispiel #10
0
def ReadStarFile(star_file):
    """
    Reads a star file and returns a pandas dataframe.
        NB: I need to test the different programs that it can read.
    Args:
        Star file
    Returns:
        Pandas Dataframe
    """
    dataframe = star.parse_star(star_file)
    columns = dataframe.columns
    new_columns = []
    # Change column names to general column names (without numbers)
    # Just parses the header and finds the names
    for column in columns:
        no_number = column.split()[0]
        new_column_name = re.sub('rln', '', no_number)  #NB: removed _
        new_columns.append(new_column_name)

    # Renames the columns with the neater and nicer names
    dataframe.columns = new_columns
    return (dataframe)
Beispiel #11
0
def main(args):
    if args.input.endswith(".cs"):
        cs = np.load(args.input)
        if args.passthrough is None:
            if u"blob/path" not in cs.dtype.names:
                print(
                    "A passthrough file is required (found inside the cryoSPARC 2+ job directory)"
                )
                return 1
        df = metadata.parse_cryosparc_2_cs(cs,
                                           passthrough=args.passthrough,
                                           minphic=args.minphic)
    else:
        meta = metadata.parse_cryosparc_065_csv(
            args.input)  # Read cryosparc metadata file.
        df = metadata.cryosparc_065_csv2star(meta, args.minphic)

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    if args.copy_micrograph_coordinates is not None:
        coord_star = pd.concat(
            (star.parse_star(inp, keep_index=False)
             for inp in glob(args.copy_micrograph_coordinates)),
            join="inner")
        df = star.smart_merge(df,
                              coord_star,
                              fields=star.Relion.MICROGRAPH_COORDS)

    if args.transform is not None:
        r = np.array(json.loads(args.transform))
        df = star.transform_star(df, r, inplace=True)

    # Write Relion .star file with correct headers.
    star.write_star(args.output, df, reindex=True)
    return 0
Beispiel #12
0
def main(args):
    log = logging.getLogger(__name__)
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))

    if args.target is None and args.sym is None and args.transform is None and args.euler is None:
        log.error("At least a target, transformation matrix, Euler angles, or a symmetry group must be provided")
        return 1
    elif (args.target is not None or args.transform is not None) and args.boxsize is None and args.origin is None:
        log.error("An origin must be provided via --boxsize or --origin")
        return 1

    if args.apix is None:
        df = star.parse_star(args.input, nrows=1)
        args.apix = star.calculate_apix(df)
        if args.apix is None:
            log.warn("Could not compute pixel size, default is 1.0 Angstroms per pixel")
            args.apix = 1.0
            df[star.Relion.MAGNIFICATION] = 10000
            df[star.Relion.DETECTORPIXELSIZE] = 1.0

    if args.target is not None:
        try:
            args.target = np.array([np.double(tok) for tok in args.target.split(",")])
        except:
            log.error("Target must be comma-separated list of x,y,z coordinates")
            return 1

    if args.euler is not None:
        try:
            args.euler = np.deg2rad(np.array([np.double(tok) for tok in args.euler.split(",")]))
            args.transform = np.zeros((3, 4))
            args.transform[:, :3] = geom.euler2rot(*args.euler)
            if args.target is not None:
                args.transform[:, -1] = args.target
        except:
            log.error("Euler angles must be comma-separated list of rotation, tilt, skew in degrees")
            return 1

    if args.transform is not None and not hasattr(args.transform, "dtype"):
        if args.target is not None:
            log.warn("--target supersedes --transform")
        try:
            args.transform = np.array(json.loads(args.transform))
        except:
            log.error("Transformation matrix must be in JSON/Numpy format")
            return 1

    if args.origin is not None:
        if args.boxsize is not None:
            log.warn("--origin supersedes --boxsize")
        try:
            args.origin = np.array([np.double(tok) for tok in args.origin.split(",")])
            args.origin /= args.apix
        except:
            log.error("Origin must be comma-separated list of x,y,z coordinates")
            return 1
    elif args.boxsize is not None:
        args.origin = np.ones(3) * args.boxsize / 2
    
    if args.sym is not None:
        args.sym = util.relion_symmetry_group(args.sym)

    df = star.parse_star(args.input)

    if star.calculate_apix(df) != args.apix:
        log.warn("Using specified pixel size of %f instead of calculated size %f" %
                 (args.apix, star.calculate_apix(df)))

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    if args.target is not None:
        args.target /= args.apix
        c = args.target - args.origin
        c = np.where(np.abs(c) < 1, 0, c)  # Ignore very small coordinates.
        d = np.linalg.norm(c)
        ax = c / d
        r = geom.euler2rot(*np.array([np.arctan2(ax[1], ax[0]), np.arccos(ax[2]), np.deg2rad(args.psi)]))
        d = -d
    elif args.transform is not None:
        r = args.transform[:, :3]
        if args.transform.shape[1] == 4:
            d = args.transform[:, -1] / args.apix
            d = r.dot(args.origin) + d - args.origin
        else:
            d = 0
    elif args.sym is not None:
        r = np.identity(3)
        d = -args.displacement / args.apix
    else:
        log.error("At least a target or symmetry group must be provided via --target or --sym")
        return 1

    log.debug("Final rotation: %s" % str(r).replace("\n", "\n" + " " * 16))
    ops = [op.dot(r.T) for op in args.sym] if args.sym is not None else [r.T]
    log.debug("Final translation: %s (%f px)" % (str(d), np.linalg.norm(d)))
    dfs = list(subparticle_expansion(df, ops, d, rotate=args.shift_only, invert=args.invert, adjust_defocus=args.adjust_defocus))
 
    if args.recenter:
        for s in dfs:
            star.recenter(s, inplace=True)
    
    if args.suffix is None and not args.skip_join:
        if len(dfs) > 1:
            df = util.interleave(dfs)
        else:
            df = dfs[0]
        df = star.compatible(df, relion2=args.relion2, inplace=True)
        star.write_star(args.output, df, optics=(not args.relion2))
    else:
        for i, s in enumerate(dfs):
            s = star.compatible(s, relion2=args.relion2, inplace=True)
            star.write_star(os.path.join(args.output, args.suffix + "_%d" % i), s, optics=(not args.relion2))
    return 0
Beispiel #13
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))
    df = star.parse_star(args.input, keep_index=False)
    star.augment_star_ucsf(df)
    maxshift = np.round(np.max(np.abs(df[star.Relion.ORIGINS].values)))

    if args.map is not None:
        if args.map.endswith(".npy"):
            log.info("Reading precomputed 3D FFT of volume")
            f3d = np.load(args.map)
            log.info("Finished reading 3D FFT of volume")
            if args.size is None:
                args.size = (f3d.shape[0] - 3) // args.pfac
        else:
            vol = mrc.read(args.map, inc_header=False, compat="relion")
            if args.mask is not None:
                mask = mrc.read(args.mask, inc_header=False, compat="relion")
                vol *= mask
            if args.size is None:
                args.size = vol.shape[0]
            if args.crop is not None and args.size // 2 < maxshift + args.crop // 2:
                log.error(
                    "Some shifts are too large to crop (maximum crop is %d)" %
                    (args.size - 2 * maxshift))
                return 1
            log.info("Preparing 3D FFT of volume")
            f3d = vop.vol_ft(vol, pfac=args.pfac, threads=args.threads)
            log.info("Finished 3D FFT of volume")
    else:
        log.error("Please supply a map")
        return 1

    sz = (f3d.shape[0] - 3) // args.pfac
    apix = star.calculate_apix(df) * np.double(args.size) / sz
    sx, sy = np.meshgrid(np.fft.rfftfreq(sz), np.fft.fftfreq(sz))
    s = np.sqrt(sx**2 + sy**2)
    a = np.arctan2(sy, sx)
    log.info("Projection size is %d, unpadded volume size is %d" %
             (args.size, sz))
    log.info("Effective pixel size is %f A/px" % apix)

    if args.subtract and args.size != sz:
        log.error("Volume and projections must be same size when subtracting")
        return 1

    if args.crop is not None and args.size // 2 < maxshift + args.crop // 2:
        log.error("Some shifts are too large to crop (maximum crop is %d)" %
                  (args.size - 2 * maxshift))
        return 1

    ift = None

    with mrc.ZSliceWriter(args.output, psz=apix) as zsw:
        for i, p in df.iterrows():
            f2d = project(f3d,
                          p,
                          s,
                          sx,
                          sy,
                          a,
                          pfac=args.pfac,
                          apply_ctf=args.ctf,
                          size=args.size,
                          flip_phase=args.flip)
            if ift is None:
                ift = irfft2(f2d.copy(),
                             threads=args.threads,
                             planner_effort="FFTW_ESTIMATE",
                             auto_align_input=True,
                             auto_contiguous=True)
            proj = fftshift(
                ift(f2d.copy(),
                    np.zeros(ift.output_shape, dtype=ift.output_dtype)))
            log.debug("%f +/- %f" % (np.mean(proj), np.std(proj)))
            if args.subtract:
                with mrc.ZSliceReader(p["ucsfImagePath"]) as zsr:
                    img = zsr.read(p["ucsfImageIndex"])
                log.debug("%f +/- %f" % (np.mean(img), np.std(img)))
                proj = img - proj
            if args.crop is not None:
                orihalf = args.size // 2
                newhalf = args.crop // 2
                x = orihalf - np.int(np.round(p[star.Relion.ORIGINX]))
                y = orihalf - np.int(np.round(p[star.Relion.ORIGINY]))
                proj = proj[y - newhalf:y + newhalf, x - newhalf:x + newhalf]
            zsw.write(proj)
            log.debug(
                "%d@%s: %d/%d" %
                (p["ucsfImageIndex"], p["ucsfImagePath"], i + 1, df.shape[0]))

    if args.star is not None:
        log.info("Writing output .star file")
        if args.crop is not None:
            df = star.recenter(df, inplace=True)
        if args.subtract:
            df[star.UCSF.IMAGE_ORIGINAL_PATH] = df[star.UCSF.IMAGE_PATH]
            df[star.UCSF.IMAGE_ORIGINAL_INDEX] = df[star.UCSF.IMAGE_INDEX]
        df[star.UCSF.IMAGE_PATH] = args.output
        df[star.UCSF.IMAGE_INDEX] = np.arange(df.shape[0])
        star.simplify_star_ucsf(df)
        star.write_star(args.star, df)
    return 0
Beispiel #14
0
from pyem import star
import re
import numpy as np

star_file = 'my_star.star'

df = star.parse_star(star_file)

columns = df.columns
new_columns = []

# Change column names to general column names (without numbers)
for column in columns:
    no_number = column.split()[0]
    new_column_name = re.sub('_rln','',no_number)
    new_columns.append(new_column_name)

df.columns = new_columns

subset_df = df[['GroupNumber','DefocusV','DefocusU','DefocusAngle','AnglePsi','AngleRot','AngleTilt','OriginX','OriginY']]

# Divide OriginX and OriginY by 2
subset_df['OriginX'] = subset_df['OriginX']/2
subset_df['OriginY'] = subset_df['OriginY']/2

# Index by 1 instead of 0
subset_df.index = np.arange(1, len(subset_df) + 1)

# Add a column that just says 9
nine_list = [9]* len(subset_df)
subset_df.insert(0,'9',nine_list)
Beispiel #15
0
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
plt.style.use('seaborn-white')
from pyem.star import parse_star

#Extract the angles data
star = parse_star('run_data.star')
xfields = [f for f in star.columns if "Rot" in f]
xfield = xfields[0]
yfields = [f for f in star.columns if "Tilt" in f]
yfield = yfields[0]
angles = star[[xfield, yfield]]
angles.columns = ["rot", "tilt"]

#Plot
fig = plt.figure(figsize=(6,4))
ax = fig.add_subplot(111)
ax.plot(angles.tilt, angles.rot, linestyle='', marker='o', markersize=0.3)
plt.xlabel('Tilt (degrees)')
plt.ylabel('Rotation (degrees)')

#save plot
Beispiel #16
0
def main(args):
    if args.info:
        args.input.append(args.output)

    df = pd.concat(
        (star.parse_star(inp, augment=args.augment) for inp in args.input),
        join="inner")

    dfaux = None

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    if args.info:
        if star.is_particle_star(df) and star.Relion.CLASS in df.columns:
            c = df[star.Relion.CLASS].value_counts()
            print("%s particles in %d classes" %
                  ("{:,}".format(df.shape[0]), len(c)))
            print("    ".join([
                '%d: %s (%.2f %%)' % (i, "{:,}".format(s), 100. * s / c.sum())
                for i, s in iteritems(c.sort_index())
            ]))
        elif star.is_particle_star(df):
            print("%s particles" % "{:,}".format(df.shape[0]))
        if star.Relion.MICROGRAPH_NAME in df.columns:
            mgraphcnt = df[star.Relion.MICROGRAPH_NAME].value_counts()
            print(
                "%s micrographs, %s +/- %s particles per micrograph" %
                ("{:,}".format(len(mgraphcnt)), "{:,.3f}".format(
                    np.mean(mgraphcnt)), "{:,.3f}".format(np.std(mgraphcnt))))
        try:
            print("%f A/px (%sX magnification)" %
                  (star.calculate_apix(df), "{:,.0f}".format(
                      df[star.Relion.MAGNIFICATION][0])))
        except KeyError:
            pass
        if len(df.columns.intersection(star.Relion.ORIGINS3D)) > 0:
            print("Largest shift is %f pixels" % np.max(
                np.abs(df[df.columns.intersection(
                    star.Relion.ORIGINS3D)].values)))
        return 0

    if args.drop_angles:
        df.drop(star.Relion.ANGLES, axis=1, inplace=True, errors="ignore")

    if args.drop_containing is not None:
        containing_fields = [
            f for q in args.drop_containing for f in df.columns if q in f
        ]
        if args.invert:
            containing_fields = df.columns.difference(containing_fields)
        df.drop(containing_fields, axis=1, inplace=True, errors="ignore")

    if args.offset_group is not None:
        df[star.Relion.GROUPNUMBER] += args.offset_group

    if args.restack is not None:
        if not args.augment:
            star.augment_star_ucsf(df, inplace=True)
        star.set_original_fields(df, inplace=True)
        df[star.UCSF.IMAGE_PATH] = args.restack
        df[star.UCSF.IMAGE_INDEX] = np.arange(df.shape[0])

    if args.subsample_micrographs is not None:
        if args.bootstrap is not None:
            print("Only particle sampling allows bootstrapping")
            return 1
        mgraphs = df[star.Relion.MICROGRAPH_NAME].unique()
        if args.subsample_micrographs < 1:
            args.subsample_micrographs = np.int(
                max(np.round(args.subsample_micrographs * len(mgraphs)), 1))
        else:
            args.subsample_micrographs = np.int(args.subsample_micrographs)
        ind = np.random.choice(len(mgraphs),
                               size=args.subsample_micrographs,
                               replace=False)
        mask = df[star.Relion.MICROGRAPH_NAME].isin(mgraphs[ind])
        if args.auxout is not None:
            dfaux = df.loc[~mask]
        df = df.loc[mask]

    if args.subsample is not None and args.suffix == "":
        if args.subsample < 1:
            args.subsample = np.int(
                max(np.round(args.subsample * df.shape[0]), 1))
        else:
            args.subsample = np.int(args.subsample)
        ind = np.random.choice(df.shape[0], size=args.subsample, replace=False)
        mask = df.index.isin(ind)
        if args.auxout is not None:
            dfaux = df.loc[~mask]
        df = df.loc[mask]

    if args.copy_angles is not None:
        angle_star = star.parse_star(args.copy_angles, augment=args.augment)
        df = star.smart_merge(df,
                              angle_star,
                              fields=star.Relion.ANGLES,
                              key=args.merge_key)

    if args.copy_alignments is not None:
        align_star = star.parse_star(args.copy_alignments,
                                     augment=args.augment)
        df = star.smart_merge(df,
                              align_star,
                              fields=star.Relion.ALIGNMENTS,
                              key=args.merge_key)

    if args.copy_reconstruct_images is not None:
        recon_star = star.parse_star(args.copy_reconstruct_images,
                                     augment=args.augment)
        df[star.Relion.RECONSTRUCT_IMAGE_NAME] = recon_star[
            star.Relion.IMAGE_NAME]

    if args.transform is not None:
        if args.transform.count(",") == 2:
            r = geom.euler2rot(
                *np.deg2rad([np.double(s) for s in args.transform.split(",")]))
        else:
            r = np.array(json.loads(args.transform))
        df = star.transform_star(df, r, inplace=True)

    if args.invert_hand:
        df = star.invert_hand(df, inplace=True)

    if args.copy_paths is not None:
        path_star = star.parse_star(args.copy_paths)
        star.set_original_fields(df, inplace=True)
        df[star.Relion.IMAGE_NAME] = path_star[star.Relion.IMAGE_NAME]

    if args.copy_ctf is not None:
        ctf_star = pd.concat((star.parse_star(inp, augment=args.augment)
                              for inp in glob.glob(args.copy_ctf)),
                             join="inner")
        df = star.smart_merge(df,
                              ctf_star,
                              star.Relion.CTF_PARAMS,
                              key=args.merge_key)

    if args.copy_micrograph_coordinates is not None:
        coord_star = pd.concat(
            (star.parse_star(inp, augment=args.augment)
             for inp in glob.glob(args.copy_micrograph_coordinates)),
            join="inner")
        df = star.smart_merge(df,
                              coord_star,
                              fields=star.Relion.MICROGRAPH_COORDS,
                              key=args.merge_key)

    if args.scale is not None:
        star.scale_coordinates(df, args.scale, inplace=True)
        star.scale_origins(df, args.scale, inplace=True)
        star.scale_magnification(df, args.scale, inplace=True)

    if args.scale_particles is not None:
        star.scale_origins(df, args.scale_particles, inplace=True)
        star.scale_magnification(df, args.scale_particles, inplace=True)

    if args.scale_coordinates is not None:
        star.scale_coordinates(df, args.scale_coordinates, inplace=True)

    if args.scale_origins is not None:
        star.scale_origins(df, args.scale_origins, inplace=True)

    if args.scale_magnification is not None:
        star.scale_magnification(df, args.scale_magnification, inplace=True)

    if args.scale_apix is not None:
        star.scale_apix(df, args.scale_apix, inplace=True)

    if args.recenter:
        df = star.recenter(df, inplace=True)

    if args.zero_origins:
        df = star.zero_origins(df, inplace=True)

    if args.pick:
        df.drop(df.columns.difference(star.Relion.PICK_PARAMS),
                axis=1,
                inplace=True,
                errors="ignore")

    if args.subsample is not None and args.suffix != "":
        if args.subsample < 1:
            print("Specific integer sample size")
            return 1
        nsamplings = args.bootstrap if args.bootstrap is not None else df.shape[
            0] / np.int(args.subsample)
        inds = np.random.choice(df.shape[0],
                                size=(nsamplings, np.int(args.subsample)),
                                replace=args.bootstrap is not None)
        for i, ind in enumerate(inds):
            star.write_star(
                os.path.join(
                    args.output,
                    os.path.basename(args.input[0])[:-5] + args.suffix +
                    "_%d" % (i + 1)), df.iloc[ind])

    if args.to_micrographs:
        df = star.to_micrographs(df)

    if args.micrograph_range:
        df.set_index(star.Relion.MICROGRAPH_NAME, inplace=True)
        m, n = [int(tok) for tok in args.micrograph_range.split(",")]
        mg = df.index.unique().sort_values()
        outside = list(range(0, m)) + list(range(n, len(mg)))
        dfaux = df.loc[mg[outside]].reset_index()
        df = df.loc[mg[m:n]].reset_index()

    if args.micrograph_path is not None:
        df = star.replace_micrograph_path(df,
                                          args.micrograph_path,
                                          inplace=True)

    if args.min_separation is not None:
        gb = df.groupby(star.Relion.MICROGRAPH_NAME)
        dupes = []
        for n, g in gb:
            nb = algo.query_connected(
                g[star.Relion.COORDS].values - g[star.Relion.ORIGINS],
                args.min_separation / star.calculate_apix(df))
            dupes.extend(g.index[~np.isnan(nb)])
        dfaux = df.loc[dupes]
        df.drop(dupes, inplace=True)

    if args.merge_source is not None:
        if args.merge_fields is not None:
            if "," in args.merge_fields:
                args.merge_fields = args.merge_fields.split(",")
            else:
                args.merge_fields = [args.merge_fields]
        else:
            print("Merge fields must be specified using --merge-fields")
            return 1
        if args.merge_key is not None:
            if "," in args.merge_key:
                args.merge_key = args.merge_key.split(",")
        if args.by_original:
            args.by_original = star.original_field(args.merge_key)
        else:
            args.by_original = args.merge_key
        merge_star = star.parse_star(args.merge_source, augment=args.augment)
        df = star.smart_merge(df,
                              merge_star,
                              fields=args.merge_fields,
                              key=args.merge_key,
                              left_key=args.by_original)

    if args.revert_original:
        df = star.revert_original(df, inplace=True)

    if args.set_optics is not None:
        tok = args.set_optics.split(",")
        df = star.set_optics_groups(df,
                                    sep=tok[0],
                                    idx=int(tok[1]),
                                    inplace=True)
        df.dropna(axis=0, how="any", inplace=True)

    if args.drop_optics_group is not None:
        idx = df[star.Relion.OPTICSGROUP].isin(args.drop_optics_group)
        if not np.any(idx):
            idx = df[star.Relion.OPTICSGROUPNAME].isin(args.drop_optics_group)
        if not np.any(idx):
            print("No group found to drop")
            return 1
        df = df.loc[~idx]

    if args.split_micrographs:
        dfs = star.split_micrographs(df)
        for mg in dfs:
            star.write_star(
                os.path.join(args.output,
                             os.path.basename(mg)[:-4]) + args.suffix, dfs[mg])
        return 0

    if args.auxout is not None and dfaux is not None:
        if not args.relion2:
            df = star.remove_deprecated_relion2(dfaux, inplace=True)
            star.write_star(args.output,
                            df,
                            resort_records=args.sort,
                            simplify=args.augment_output,
                            optics=True)
        else:
            df = star.remove_new_relion31(dfaux, inplace=True)
            star.write_star(args.output,
                            df,
                            resort_records=args.sort,
                            simplify=args.augment_output,
                            optics=False)

    if args.output is not None:
        if not args.relion2:  # Relion 3.1 style output.
            df = star.remove_deprecated_relion2(df, inplace=True)
            star.write_star(args.output,
                            df,
                            resort_records=args.sort,
                            simplify=args.augment_output,
                            optics=True)
        else:
            df = star.remove_new_relion31(df, inplace=True)
            star.write_star(args.output,
                            df,
                            resort_records=args.sort,
                            simplify=args.augment_output,
                            optics=False)
    return 0
Beispiel #17
0
def main(args):
    star = parse_star(args.input)

    if args.cmap not in plt.colormaps():
        print("Colormap " + args.cmap + " is not available")
        print("Use one of: " + ", ".join(plt.colormaps()))
        return 1

    xfields = [f for f in star.columns if "Tilt" in f]
    if len(xfields) == 0:
        print("No tilt angle found")
        return 1
    xfield = xfields[0]

    if args.psi:
        yfields = [f for f in star.columns if "Psi" in f]
        if len(yfields) == 0:
            print("No psi angle found")
            return 1
    else:
        yfields = [f for f in star.columns if "Rot" in f]
        if len(yfields) == 0:
            print("No rot angle found")
            return 1
    yfield = yfields[0]

    if args.cls is not None:
        clsfields = [f for f in star.columns if "ClassNumber" in f]
        if len(clsfields) == 0:
            print("No class labels found")
            return 1
        clsfield = clsfields[0]
        if args.cls > 0:
            ind = star[clsfield] == args.cls
            if not np.any(ind):
                print("Specified class has no members")
                return 1
            data = star.loc[ind][[xfield, yfield]]
        else:
            classes = np.unique(star[clsfield])
            ind = (star[clsfields[0]] == cls for cls in classes)
            data = [star.loc[i][[xfield, yfield]] for i in ind]
            for d, cls in zip(data, classes):
                h, theta, r = compute_histogram(d, args.samples)
                fig, ax, aux_ax = make_figure(h, theta, r, rmax=args.rmax, figsize=args.figsize, dpi=args.dpi,
                                              scale=args.scale, cmap=args.cmap, alpha=args.alpha)
                if args.psi:
                    ax.axis["left"].label.set_text("Psi Angle")
                else:
                    ax.axis["left"].label.set_text("Rotation Angle")

                fig.savefig(args.output + "_class%d." % cls + args.format, format=args.format, bbox_inches="tight",
                            dpi="figure",
                            transparent=args.transparent)
                plt.close(fig)
            return 0
    else:
        data = star[[xfield, yfield]]

    if args.subplot is not None:
        raise NotImplementedError("Subplots are not yet supported")

    h, theta, r = compute_histogram(data, args.samples)
    fig, ax, aux_ax = make_figure(h, theta, r, rmax=args.rmax, figsize=args.figsize, dpi=args.dpi, scale=args.scale,
                                  cmap=args.cmap, alpha=args.alpha)
    if args.psi:
        ax.axis["left"].label.set_text("Psi Angle")
    else:
        ax.axis["left"].label.set_text("Rotation Angle")
    fig.savefig(args.output, format=args.format, bbox_inches="tight", dpi="figure", transparent=args.transparent)
    return 0
Beispiel #18
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))

    dfs = [metadata.parse_fx_par(fn) for fn in args.input]
    n = dfs[0].shape[0]
    if not np.all(np.array([df.shape[0] for df in dfs]) == n):
        log.error("Input files are not aligned!")
        return 1
    df = pd.concat(dfs, axis=0, ignore_index=True)
    df["CLASS"] = np.repeat(np.arange(1, len(dfs) + 1), n)

    if args.min_occ:
        df = df[df["OCC"] >= args.min_occ]

    df = df.sort_values(by="OCC")
    df = df.drop_duplicates("C", keep="last")
    df = df.sort_values(by="C")
    df.reset_index(inplace=True)

    if args.min_score is not None:
        if args.min_score < 1:
            args.min_score = np.percentile(df["SCORE"],
                                           (1 - args.min_score) * 100)
        df = df.loc[df["SCORE"] >= args.min_score]

    if args.merge is not None:
        dfo = star.parse_star(args.merge)
        args.apix = star.calculate_apix(dfo)
        args.cs = dfo.iloc[0][star.Relion.CS]
        args.ac = dfo.iloc[0][star.Relion.AC]
        args.voltage = dfo.iloc[0][star.Relion.VOLTAGE]
        df = metadata.par2star(df,
                               data_path=args.stack,
                               apix=args.apix,
                               cs=args.cs,
                               ac=args.ac,
                               kv=args.voltage,
                               invert_eulers=args.invert_eulers)
        if args.stack is None:
            df[star.UCSF.IMAGE_INDEX] = dfo[star.UCSF.IMAGE_INDEX]
            df[star.UCSF.IMAGE_PATH] = dfo[star.UCSF.IMAGE_PATH]
        key = [star.UCSF.IMAGE_INDEX, star.UCSF.IMAGE_PATH]
        fields = star.Relion.MICROGRAPH_COORDS + [
            star.UCSF.IMAGE_ORIGINAL_INDEX, star.UCSF.IMAGE_ORIGINAL_PATH
        ] + [star.Relion.OPTICSGROUP
             ] + star.Relion.OPTICSGROUPTABLE + [star.Relion.RANDOMSUBSET]
        df = star.smart_merge(df, dfo, fields=fields, key=key)
        if args.revert_original:
            df = star.revert_original(df, inplace=True)
    else:
        df = metadata.par2star(df,
                               data_path=args.stack,
                               apix=args.apix,
                               cs=args.cs,
                               ac=args.ac,
                               kv=args.voltage,
                               invert_eulers=args.invert_eulers)

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    df = star.check_defaults(df, inplace=True)
    df = star.compatible(df, relion2=args.relion2, inplace=True)
    star.write_star(args.output, df, optics=(not args.relion2))
    return 0
Beispiel #19
0
def main(args):
    if args.info:
        args.input.append(args.output)

    df = pd.concat(
        (star.parse_star(inp, augment=args.augment) for inp in args.input),
        join="inner")

    dfaux = None

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    if args.info:
        if star.is_particle_star(df) and star.Relion.CLASS in df.columns:
            c = df[star.Relion.CLASS].value_counts()
            print("%s particles in %d classes" %
                  ("{:,}".format(df.shape[0]), len(c)))
            print("    ".join([
                '%d: %s (%.2f %%)' % (i, "{:,}".format(s), 100. * s / c.sum())
                for i, s in c.sort_index().iteritems()
            ]))
        elif star.is_particle_star(df):
            print("%s particles" % "{:,}".format(df.shape[0]))
        if star.Relion.MICROGRAPH_NAME in df.columns:
            mgraphcnt = df[star.Relion.MICROGRAPH_NAME].value_counts()
            print(
                "%s micrographs, %s +/- %s particles per micrograph" %
                ("{:,}".format(len(mgraphcnt)), "{:,.3f}".format(
                    np.mean(mgraphcnt)), "{:,.3f}".format(np.std(mgraphcnt))))
        try:
            print("%f A/px (%sX magnification)" %
                  (star.calculate_apix(df), "{:,.0f}".format(
                      df[star.Relion.MAGNIFICATION][0])))
        except KeyError:
            pass
        return 0

    if args.drop_angles:
        df.drop(star.Relion.ANGLES, axis=1, inplace=True, errors="ignore")

    if args.drop_containing is not None:
        containing_fields = [
            f for q in args.drop_containing for f in df.columns if q in f
        ]
        if args.invert:
            containing_fields = df.columns.difference(containing_fields)
        df.drop(containing_fields, axis=1, inplace=True, errors="ignore")

    if args.offset_group is not None:
        df[star.Relion.GROUPNUMBER] += args.offset_group

    if args.subsample_micrographs is not None:
        if args.bootstrap is not None:
            print("Only particle sampling allows bootstrapping")
            return 1
        mgraphs = df[star.Relion.MICROGRAPH_NAME].unique()
        if args.subsample_micrographs < 1:
            args.subsample_micrographs = np.int(
                max(np.round(args.subsample_micrographs * len(mgraphs)), 1))
        else:
            args.subsample_micrographs = np.int(args.subsample_micrographs)
        ind = np.random.choice(len(mgraphs),
                               size=args.subsample_micrographs,
                               replace=False)
        mask = df[star.Relion.MICROGRAPH_NAME].isin(mgraphs[ind])
        if args.auxout is not None:
            dfaux = df.loc[~mask]
        df = df.loc[mask]

    if args.subsample is not None and args.suffix == "":
        if args.subsample < 1:
            args.subsample = np.int(
                max(np.round(args.subsample * df.shape[0]), 1))
        else:
            args.subsample = np.int(args.subsample)
        ind = np.random.choice(df.shape[0], size=args.subsample, replace=False)
        mask = df.index.isin(ind)
        if args.auxout is not None:
            dfaux = df.loc[~mask]
        df = df.loc[mask]

    if args.copy_angles is not None:
        angle_star = star.parse_star(args.copy_angles, augment=args.augment)
        df = star.smart_merge(df, angle_star, fields=star.Relion.ANGLES)

    if args.transform is not None:
        if args.transform.count(",") == 2:
            r = star.euler2rot(
                *np.deg2rad([np.double(s) for s in args.transform.split(",")]))
        else:
            r = np.array(json.loads(args.transform))
        df = star.transform_star(df, r, inplace=True)

    if args.invert_hand:
        df[star.Relion.ANGLEROT] = -df[star.Relion.ANGLEROT]
        df[star.Relion.ANGLETILT] = 180 - df[star.Relion.ANGLETILT]

    if args.copy_paths is not None:
        path_star = star.parse_star(args.copy_paths)
        df[star.Relion.IMAGE_NAME] = path_star[star.Relion.IMAGE_NAME]

    if args.copy_ctf is not None:
        ctf_star = pd.concat((star.parse_star(inp, augment=args.augment)
                              for inp in glob.glob(args.copy_ctf)),
                             join="inner")
        df = star.smart_merge(df, ctf_star, star.Relion.CTF_PARAMS)

    if args.copy_micrograph_coordinates is not None:
        coord_star = pd.concat(
            (star.parse_star(inp, augment=args.augment)
             for inp in glob.glob(args.copy_micrograph_coordinates)),
            join="inner")
        df = star.smart_merge(df,
                              coord_star,
                              fields=star.Relion.MICROGRAPH_COORDS)

    if args.scale is not None:
        star.scale_coordinates(df, args.scale, inplace=True)
        star.scale_origins(df, args.scale, inplace=True)
        star.scale_magnification(df, args.scale, inplace=True)

    if args.scale_particles is not None:
        star.scale_origins(df, args.scale, inplace=True)
        star.scale_magnification(df, args.scale, inplace=True)

    if args.scale_coordinates is not None:
        star.scale_coordinates(df, args.scale_coordinates, inplace=True)

    if args.scale_origins is not None:
        star.scale_origins(df, args.scale_origins, inplace=True)

    if args.scale_magnification is not None:
        star.scale_magnification(df, args.scale_magnfication, inplace=True)

    if args.recenter:
        df = star.recenter(df, inplace=True)

    if args.zero_origins:
        df = star.zero_origins(df, inplace=True)

    if args.pick:
        df.drop(df.columns.difference(star.Relion.PICK_PARAMS),
                axis=1,
                inplace=True,
                errors="ignore")

    if args.subsample is not None and args.suffix != "":
        if args.subsample < 1:
            print("Specific integer sample size")
            return 1
        nsamplings = args.bootstrap if args.bootstrap is not None else df.shape[
            0] / np.int(args.subsample)
        inds = np.random.choice(df.shape[0],
                                size=(nsamplings, np.int(args.subsample)),
                                replace=args.bootstrap is not None)
        for i, ind in enumerate(inds):
            star.write_star(
                os.path.join(
                    args.output,
                    os.path.basename(args.input[0])[:-5] + args.suffix +
                    "_%d" % (i + 1)), df.iloc[ind])

    if args.to_micrographs:
        gb = df.groupby(star.Relion.MICROGRAPH_NAME)
        mu = gb.mean()
        df = mu[[
            c for c in star.Relion.CTF_PARAMS + star.Relion.MICROSCOPE_PARAMS +
            [star.Relion.MICROGRAPH_NAME] if c in mu
        ]].reset_index()

    if args.micrograph_range:
        df.set_index(star.Relion.MICROGRAPH_NAME, inplace=True)
        m, n = [int(tok) for tok in args.micrograph_range.split(",")]
        mg = df.index.unique().sort_values()
        outside = list(range(0, m)) + list(range(n, len(mg)))
        dfaux = df.loc[mg[outside]].reset_index()
        df = df.loc[mg[m:n]].reset_index()

    if args.micrograph_path is not None:
        df = star.replace_micrograph_path(df,
                                          args.micrograph_path,
                                          inplace=True)

    if args.min_separation is not None:
        gb = df.groupby(star.Relion.MICROGRAPH_NAME)
        dupes = []
        for n, g in gb:
            nb = algo.query_connected(
                g[star.Relion.COORDS],
                args.min_separation / star.calculate_apix(df))
            dupes.extend(g.index[~np.isnan(nb)])
        dfaux = df.loc[dupes]
        df.drop(dupes, inplace=True)

    if args.merge_source is not None:
        if args.merge_fields is not None:
            if "," in args.merge_fields:
                args.merge_fields = args.merge_fields.split(",")
            else:
                args.merge_fields = [args.merge_fields]
        else:
            print("Merge fields must be specified using --merge-fields")
            return 1
        if args.merge_key is not None:
            if "," in args.merge_key:
                args.merge_key = args.merge_key.split(",")
        merge_star = star.parse_star(args.merge_source, augment=args.augment)
        df = star.smart_merge(df,
                              merge_star,
                              fields=args.merge_fields,
                              key=args.merge_key)

    if args.split_micrographs:
        dfs = star.split_micrographs(df)
        for mg in dfs:
            star.write_star(
                os.path.join(args.output,
                             os.path.basename(mg)[:-4]) + args.suffix, dfs[mg])
        return 0

    if args.auxout is not None and dfaux is not None:
        star.write_star(args.auxout, dfaux, simplify=args.augment)

    if args.output is not None:
        star.write_star(args.output, df, simplify=args.augment)
    return 0
Beispiel #20
0
def main(args):
    log = logging.getLogger(__name__)
    log.setLevel(logging.INFO)
    hdlr = logging.StreamHandler(sys.stdout)
    if args.quiet:
        hdlr.setLevel(logging.WARNING)
    else:
        hdlr.setLevel(logging.INFO)
    log.addHandler(hdlr)

    if args.markers is None and args.target is None and args.sym is None:
        log.error(
            "A marker or symmetry group must be provided via --target, --markers, or --sym"
        )
        return 1
    elif args.sym is None and args.markers is None and args.boxsize is None and args.origin is None:
        log.error(
            "An origin must be provided via --boxsize, --origin, or --markers")
        return 1
    elif args.sym is not None and args.markers is None and args.target is None and \
            (args.boxsize is not None or args.origin is not None):
        log.warn("Symmetry expansion alone will ignore --target or --origin")

    if args.target is not None:
        try:
            args.target = np.array(
                [np.double(tok) for tok in args.target.split(",")])
        except:
            log.error(
                "Target must be comma-separated list of x,y,z coordinates")
            return 1

    if args.origin is not None:
        if args.boxsize is not None:
            logger.warn("--origin supersedes --boxsize")
        try:
            args.origin = np.array(
                [np.double(tok) for tok in args.origin.split(",")])
        except:
            log.error(
                "Origin must be comma-separated list of x,y,z coordinates")
            return 1

    if args.marker_sym is not None:
        args.marker_sym = relion_symmetry_group(args.marker_sym)

    star = parse_star(args.input, keep_index=False)

    if args.apix is None:
        args.apix = calculate_apix(star)
        if args.apix is None:
            logger.warn(
                "Could not compute pixel size, default is 1.0 Angstroms per pixel"
            )
            args.apix = 1.0

    if args.cls is not None:
        star = select_classes(star, args.cls)

    cmms = []

    if args.markers is not None:
        cmmfiles = glob.glob(args.markers)
        for cmmfile in cmmfiles:
            for cmm in parse_cmm(cmmfile):
                cmms.append(cmm / args.apix)

    if args.target is not None:
        cmms.append(args.target / args.apix)

    stars = []

    if len(cmms) > 0:
        if args.origin is not None:
            args.origin /= args.apix
        elif args.boxsize is not None:
            args.origin = np.ones(3) * args.boxsize / 2
        else:
            log.warn("Using first marker as origin")
            if len(cmms) == 1:
                log.error(
                    "Using first marker as origin, expected at least two markers"
                )
                return 1
            args.origin = cmms[0]
            cmms = cmms[1:]

        markers = [cmm - args.origin for cmm in cmms]

        if args.marker_sym is not None and len(markers) == 1:
            markers = [op.dot(markers[0]) for op in args.marker_sym]
        elif args.marker_sym is not None:
            log.error(
                "Exactly one marker is required for symmetry-derived subparticles"
            )
            return 1

        rots = [euler2rot(*np.deg2rad(r[1])) for r in star[ANGLES].iterrows()]
        #origins = star[ORIGINS].copy()
        for m in markers:
            d = np.linalg.norm(m)
            ax = m / d
            op = euler2rot(
                *np.array([np.arctan2(ax[1], ax[0]),
                           np.arccos(ax[2]), 0.]))
            stars.append(transform_star(star, op.T, -d, rots=rots))

    if args.sym is not None:
        args.sym = relion_symmetry_group(args.sym)
        if len(stars) > 0:
            stars = [
                se for se in subparticle_expansion(
                    s, args.sym, -args.displacement / args.apix) for s in stars
            ]
        else:
            stars = list(
                subparticle_expansion(star, args.sym,
                                      -args.displacement / args.apix))

    if args.recenter:
        for s in stars:
            recenter(s, inplace=True)

    if args.suffix is None and not args.skip_join:
        if len(stars) > 1:
            star = interleave(stars)
        else:
            star = stars[0]
        write_star(args.output, star)
    else:
        for i, star in enumerate(stars):
            write_star(os.path.join(args.output, args.suffix + "_%d" % i),
                       star)
    return 0
Beispiel #21
0
def FileFind(atomnum, starDF, subii=0):
    if (args.missing_rlnimgname == False):
        ptcl_ii_name = star.iloc[AtomID[ptcli]].rlnImageName
        ii_ndx = int(ptcl_ii_name[:ptcl_ii_name.find('@')])
        fn = ptcl_ii_name[ptcl_ii_name.find('/'):]
        fn = fn[fn.rfind('/'):]
        mrcfilename = args.particle_path + fn
    else:
        mrcfilename = args.particle_path + args.submrcs
        ii_ndx = subii
    return mrcfilename, ii_ndx


if rank == 0:
    star = s.parse_star(args.star_file)
    if (args.savestar != ''):
        star.to_csv(args.savestar)
    if (args.particle_amount == -1):
        args.particle_amount = len(star)
    reftest = args.mrc_reference
    lam = simim.wavelength(300.0)
    C_s = 2.7 * 1e7  #C_s given in mm, convert to angstrom
    astigmatism = 0
    amplitude_contrast = 0.1

else:
    star = None
    reftest = None
    lam = None
    C_s = None
Beispiel #22
0
def load_star(filename):
    """ Load STAR file into a pyem (pandas) data frame"""
    star_file = star.parse_star(filename)
    return star_file
Beispiel #23
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))
    df = star.parse_star(args.input, keep_index=False)
    star.augment_star_ucsf(df)
    if args.map is not None:
        vol = mrc.read(args.map, inc_header=False, compat="relion")
        if args.mask is not None:
            mask = mrc.read(args.mask, inc_header=False, compat="relion")
            vol *= mask
    else:
        print("Please supply a map")
        return 1

    f3d = vop.vol_ft(vol, pfac=args.pfac, threads=args.threads)
    sz = f3d.shape[0] // 2 - 1
    sx, sy = np.meshgrid(np.fft.rfftfreq(sz), np.fft.fftfreq(sz))
    s = np.sqrt(sx**2 + sy**2)
    a = np.arctan2(sy, sx)

    ift = None

    with mrc.ZSliceWriter(args.output) as zsw:
        for i, p in df.iterrows():
            f2d = project(f3d,
                          p,
                          s,
                          sx,
                          sy,
                          a,
                          apply_ctf=args.ctf,
                          size=args.size)
            if ift is None:
                ift = irfft2(f2d.copy(),
                             threads=cpu_count(),
                             planner_effort="FFTW_ESTIMATE",
                             auto_align_input=True,
                             auto_contiguous=True)
            proj = fftshift(
                ift(f2d.copy(), np.zeros(vol.shape[:-1], dtype=vol.dtype)))
            log.debug("%f +/- %f" % (np.mean(proj), np.std(proj)))
            if args.subtract:
                with mrc.ZSliceReader(p["ucsfImagePath"]) as zsr:
                    img = zsr.read(p["ucsfImageIndex"])
                log.debug("%f +/- %f" % (np.mean(img), np.std(img)))
                proj = img - proj
            zsw.write(proj)
            log.info(
                "%d@%s: %d/%d" %
                (p["ucsfImageIndex"], p["ucsfImagePath"], i + 1, df.shape[0]))

    if args.star is not None:
        if args.subtract:
            df[star.UCSF.IMAGE_ORIGINAL_PATH] = df[star.UCSF.IMAGE_PATH]
            df[star.UCSF.IMAGE_ORIGINAL_INDEX] = df[star.UCSF.IMAGE_INDEX]
        df[star.UCSF.IMAGE_PATH] = args.output
        df[star.UCSF.IMAGE_INDEX] = np.arange(df.shape[0])
        star.simplify_star_ucsf(df)
        star.write_star(args.star, df)
    return 0
def main(args):
    """
    Projection subtraction program entry point.
    :param args: Command-line arguments parsed by ArgumentParser.parse_args()
    :return: Exit status
    """
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))

    log.debug("Reading particle .star file")
    df = parse_star(args.input, keep_index=False)
    df.reset_index(inplace=True)
    df["rlnImageOriginalName"] = df["rlnImageName"]
    df["ucsfOriginalParticleIndex"], df["ucsfOriginalImagePath"] = \
        df["rlnImageOriginalName"].str.split("@").str
    df["ucsfOriginalParticleIndex"] = pd.to_numeric(
        df["ucsfOriginalParticleIndex"])
    df.sort_values("rlnImageOriginalName", inplace=True, kind="mergesort")
    gb = df.groupby("ucsfOriginalImagePath")
    df["ucsfParticleIndex"] = gb.cumcount() + 1
    df["ucsfImagePath"] = df["ucsfOriginalImagePath"].map(
        lambda x: os.path.join(
            args.dest, args.prefix + os.path.basename(x).replace(
                ".mrcs", args.suffix + ".mrcs")))
    df["rlnImageName"] = df["ucsfParticleIndex"].map(
        lambda x: "%.6d" % x).str.cat(df["ucsfImagePath"], sep="@")
    log.debug("Read particle .star file")

    if args.submap_ft is None:
        submap = mrc.read(args.submap, inc_header=False, compat="relion")
        submap_ft = vol_ft(submap, threads=min(args.threads, cpu_count()))
    else:
        log.debug("Loading %s" % args.submap_ft)
        submap_ft = np.load(args.submap_ft)
        log.debug("Loaded %s" % args.submap_ft)

    sz = submap_ft.shape[0] // 2 - 1
    sx, sy = np.meshgrid(np.fft.rfftfreq(sz), np.fft.fftfreq(sz))
    s = np.sqrt(sx**2 + sy**2)
    r = s * sz
    r = np.round(r).astype(np.int64)
    r[r > sz // 2] = sz // 2 + 1
    nr = np.max(r) + 1
    a = np.arctan2(sy, sx)

    if args.refmap is not None:
        coefs_method = 1
        if args.refmap_ft is None:
            refmap = mrc.read(args.refmap, inc_header=False, compat="relion")
            refmap_ft = vol_ft(refmap, threads=min(args.threads, cpu_count()))
        else:
            log.debug("Loading %s" % args.refmap_ft)
            refmap_ft = np.load(args.refmap_ft)
            log.debug("Loaded %s" % args.refmap_ft)
    else:
        coefs_method = 0
        refmap_ft = np.empty(submap_ft.shape, dtype=submap_ft.dtype)
    apix = calculate_apix(df)

    log.debug("Constructing particle metadata references")
    # npart = df.shape[0]
    idx = df["ucsfOriginalParticleIndex"].values
    stack = df["ucsfOriginalImagePath"].values.astype(np.str, copy=False)
    def1 = df["rlnDefocusU"].values
    def2 = df["rlnDefocusV"].values
    angast = df["rlnDefocusAngle"].values
    phase = df["rlnPhaseShift"].values
    kv = df["rlnVoltage"].values
    ac = df["rlnAmplitudeContrast"].values
    cs = df["rlnSphericalAberration"].values
    az = df["rlnAngleRot"].values
    el = df["rlnAngleTilt"].values
    sk = df["rlnAnglePsi"].values
    xshift = df["rlnOriginX"].values
    yshift = df["rlnOriginY"].values
    new_idx = df["ucsfParticleIndex"].values
    new_stack = df["ucsfImagePath"].values.astype(np.str, copy=False)

    log.debug("Grouping particles by output stack")
    gb = df.groupby("ucsfImagePath")

    iothreads = threading.BoundedSemaphore(args.io_thread_pairs)
    qsize = args.io_queue_length
    fftthreads = args.fft_threads
    # pyfftw.interfaces.cache.enable()

    log.debug("Instantiating worker pool")
    pool = Pool(processes=args.threads)
    threads = []

    try:
        for fname, particles in gb.indices.iteritems():
            log.debug("Instantiating queue")
            queue = Queue.Queue(maxsize=qsize)
            log.debug("Create producer for %s" % fname)
            prod = threading.Thread(
                target=producer,
                args=(pool, queue, submap_ft, refmap_ft, fname, particles, idx,
                      stack, sx, sy, s, a, apix, def1, def2, angast, phase, kv,
                      ac, cs, az, el, sk, xshift, yshift, new_idx, new_stack,
                      coefs_method, r, nr, fftthreads))
            log.debug("Create consumer for %s" % fname)
            cons = threading.Thread(target=consumer,
                                    args=(queue, fname, apix, fftthreads,
                                          iothreads))
            threads.append((prod, cons))
            iothreads.acquire()
            log.debug("iotheads at %d" % iothreads._Semaphore__value)
            log.debug("Start consumer for %s" % fname)
            cons.start()
            log.debug("Start producer for %s" % fname)
            prod.start()
    except KeyboardInterrupt:
        log.debug("Main thread wants out!")

    for pair in threads:
        for thread in pair:
            try:
                thread.join()
            except RuntimeError as e:
                log.debug(e)

    pool.close()
    pool.join()
    pool.terminate()

    df.drop([c for c in df.columns if "ucsf" in c or "eman" in c],
            axis=1,
            inplace=True)

    df.set_index("index", inplace=True)
    df.sort_index(inplace=True, kind="mergesort")

    write_star(args.output, df, reindex=True)

    return 0
Beispiel #25
0
def main(args):
    """
    Projection subtraction program entry point.
    :param args: Command-line arguments parsed by ArgumentParser.parse_args()
    :return: Exit status
    """
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))

    if args.dest is None and args.suffix == "":
        args.dest = ""
        args.suffix = "_subtracted"

    log.info("Reading particle .star file")
    df = star.parse_star(args.input, keep_index=False)
    star.augment_star_ucsf(df)
    if not args.original:
        df[star.UCSF.IMAGE_ORIGINAL_PATH] = df[star.UCSF.IMAGE_PATH]
        df[star.UCSF.IMAGE_ORIGINAL_INDEX] = df[star.UCSF.IMAGE_INDEX]
    df.sort_values(star.UCSF.IMAGE_ORIGINAL_PATH,
                   inplace=True,
                   kind="mergesort")
    gb = df.groupby(star.UCSF.IMAGE_ORIGINAL_PATH)
    df[star.UCSF.IMAGE_INDEX] = gb.cumcount()
    df[star.UCSF.IMAGE_PATH] = df[star.UCSF.IMAGE_ORIGINAL_PATH].map(
        lambda x: os.path.join(
            args.dest, args.prefix + os.path.basename(x).replace(
                ".mrcs", args.suffix + ".mrcs")))

    if args.submap_ft is None:
        log.info("Reading volume")
        submap = mrc.read(args.submap, inc_header=False, compat="relion")
        if args.submask is not None:
            log.info("Masking volume")
            submask = mrc.read(args.submask, inc_header=False, compat="relion")
            submap *= submask
        log.info("Preparing 3D FFT of volume")
        submap_ft = vop.vol_ft(submap,
                               pfac=args.pfac,
                               threads=min(args.threads, cpu_count()))
        log.info("Finished 3D FFT of volume")
    else:
        log.info("Loading 3D FFT from %s" % args.submap_ft)
        submap_ft = np.load(args.submap_ft)
        log.info("Loaded 3D FFT from %s" % args.submap_ft)

    sz = (submap_ft.shape[0] - 3) // args.pfac

    maxshift = np.round(np.max(np.abs(df[star.Relion.ORIGINS].values)))
    if args.crop is not None and sz < 2 * maxshift + args.crop:
        log.error("Some shifts are too large to crop (maximum crop is %d)" %
                  (sz - 2 * maxshift))
        return 1

    sx, sy = np.meshgrid(np.fft.rfftfreq(sz), np.fft.fftfreq(sz))
    s = np.sqrt(sx**2 + sy**2)
    r = s * sz
    r = np.round(r).astype(np.int64)
    r[r > sz // 2] = sz // 2 + 1
    nr = np.max(r) + 1
    a = np.arctan2(sy, sx)

    if args.refmap is not None:
        coefs_method = 1
        if args.refmap_ft is None:
            refmap = mrc.read(args.refmap, inc_header=False, compat="relion")
            refmap_ft = vop.vol_ft(refmap,
                                   pfac=args.pfac,
                                   threads=min(args.threads, cpu_count()))
        else:
            log.info("Loading 3D FFT from %s" % args.refmap_ft)
            refmap_ft = np.load(args.refmap_ft)
            log.info("Loaded 3D FFT from %s" % args.refmap_ft)
    else:
        coefs_method = 0
        refmap_ft = np.empty(submap_ft.shape, dtype=submap_ft.dtype)

    apix = star.calculate_apix(df)
    log.info("Computed pixel size is %f A" % apix)

    log.debug("Grouping particles by output stack")
    gb = df.groupby(star.UCSF.IMAGE_PATH)

    iothreads = threading.BoundedSemaphore(args.io_thread_pairs)
    qsize = args.io_queue_length
    fftthreads = args.fft_threads

    def init():
        global tls
        tls = threading.local()

    log.info("Instantiating thread pool with %d workers" % args.threads)
    pool = Pool(processes=args.threads, initializer=init)
    threads = []

    log.info("Performing projection subtraction")

    try:
        for fname, particles in gb:
            log.debug("Instantiating queue")
            queue = Queue.Queue(maxsize=qsize)
            log.debug("Create producer for %s" % fname)
            prod = threading.Thread(target=producer,
                                    args=(pool, queue, submap_ft, refmap_ft,
                                          fname, particles, sx, sy, s, a, apix,
                                          coefs_method, r, nr, fftthreads,
                                          args.crop, args.pfac))
            log.debug("Create consumer for %s" % fname)
            cons = threading.Thread(target=consumer,
                                    args=(queue, fname, apix, iothreads))
            threads.append((prod, cons))
            iothreads.acquire()
            log.debug("iotheads at %d" % iothreads._Semaphore__value)
            log.debug("Start consumer for %s" % fname)
            cons.start()
            log.debug("Start producer for %s" % fname)
            prod.start()
    except KeyboardInterrupt:
        log.debug("Main thread wants out!")

    for pair in threads:
        for thread in pair:
            try:
                thread.join()
            except RuntimeError as e:
                log.debug(e)

    pool.close()
    pool.join()
    pool.terminate()

    log.info("Finished projection subtraction")

    log.info("Writing output .star file")
    if args.crop is not None:
        df = star.recenter(df, inplace=True)
    star.simplify_star_ucsf(df)
    star.write_star(args.output, df)

    return 0
Beispiel #26
0
def main(args):
    log = logging.getLogger(__name__)
    log.setLevel(logging.INFO)
    hdlr = logging.StreamHandler(sys.stdout)
    if args.quiet:
        hdlr.setLevel(logging.WARNING)
    else:
        hdlr.setLevel(logging.INFO)
    log.addHandler(hdlr)

    if args.target is None and args.sym is None:
        log.error(
            "At least a target or symmetry group must be provided via --target or --sym"
        )
        return 1
    elif args.target is not None and args.boxsize is None and args.origin is None:
        log.error("An origin must be provided via --boxsize or --origin")
        return 1

    if args.target is not None:
        try:
            args.target = np.array(
                [np.double(tok) for tok in args.target.split(",")])
        except:
            log.error(
                "Target must be comma-separated list of x,y,z coordinates")
            return 1

    if args.origin is not None:
        if args.boxsize is not None:
            log.warn("--origin supersedes --boxsize")
        try:
            args.origin = np.array(
                [np.double(tok) for tok in args.origin.split(",")])
        except:
            log.error(
                "Origin must be comma-separated list of x,y,z coordinates")
            return 1

    if args.sym is not None:
        args.sym = util.relion_symmetry_group(args.sym)

    df = star.parse_star(args.input)

    if args.apix is None:
        args.apix = star.calculate_apix(df)
        if args.apix is None:
            log.warn(
                "Could not compute pixel size, default is 1.0 Angstroms per pixel"
            )
            args.apix = 1.0
            df[star.Relion.MAGNIFICATION] = 10000
            df[star.DETECTORPIXELSIZE] = 1.0

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    if args.target is not None:
        if args.origin is not None:
            args.origin /= args.apix
        elif args.boxsize is not None:
            args.origin = np.ones(3) * args.boxsize / 2
        args.target /= args.apix
        c = args.target - args.origin
        c = np.where(np.abs(c) < 1, 0, c)  # Ignore very small coordinates.
        d = np.linalg.norm(c)
        ax = c / d
        cm = util.euler2rot(*np.array(
            [np.arctan2(ax[1], ax[0]),
             np.arccos(ax[2]),
             np.deg2rad(args.psi)]))
        ops = [op.dot(cm) for op in args.sym] if args.sym is not None else [cm]
        dfs = [
            star.transform_star(df,
                                op.T,
                                -d,
                                rotate=args.shift_only,
                                invert=args.target_invert,
                                adjust_defocus=args.adjust_defocus)
            for op in ops
        ]
    elif args.sym is not None:
        dfs = list(
            subparticle_expansion(df, args.sym,
                                  -args.displacement / args.apix))
    else:
        log.error(
            "At least a target or symmetry group must be provided via --target or --sym"
        )
        return 1

    if args.recenter:
        for s in dfs:
            star.recenter(s, inplace=True)

    if args.suffix is None and not args.skip_join:
        if len(dfs) > 1:
            df = util.interleave(dfs)
        else:
            df = dfs[0]
        star.write_star(args.output, df)
    else:
        for i, s in enumerate(dfs):
            star.write_star(os.path.join(args.output, args.suffix + "_%d" % i),
                            s)
    return 0
Beispiel #27
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))

    os.environ["OMP_NUM_THREADS"] = str(args.threads)
    os.environ["OPENBLAS_NUM_THREADS"] = str(args.threads)
    os.environ["MKL_NUM_THREADS"] = str(args.threads)
    os.environ["NUMBA_NUM_THREADS"] = str(args.threads)

    outdir = os.path.dirname(args.output)
    outbase = os.path.basename(args.output)

    dfs = [star.parse_star(inp, keep_index=False) for inp in args.input]
    size_err = np.array(
        args.input)[np.where(~np.equal([df.shape[0]
                                        for df in dfs[1:]], dfs[0].shape[0]))]
    if len(size_err) > 0:
        log.error(
            "All files must have same number of particles. Offending files:\n%s"
            % ", ".join(size_err))
        return 1

    dfo = dfs[0]
    dfn = dfs[1]

    oq = geom.e2q_vec(np.deg2rad(dfo[star.Relion.ANGLES].values))
    nq = geom.e2q_vec(np.deg2rad(dfn[star.Relion.ANGLES].values))
    oqu = geom.normq(oq)
    nqu = geom.normq(nq)
    resq = geom.qtimes(geom.qconj(oqu), nqu)
    mu = geom.meanq(resq)
    resqu = geom.normq(resq, mu)

    si_mult = np.random.choice(resqu.shape[0] / args.multimer,
                               args.sample / args.multimer,
                               replace=False)
    si = np.array([
        si_mult[i] * args.multimer + k for i in range(si_mult.shape[0])
        for k in range(args.multimer)
    ])
    not_si = np.setdiff1d(np.arange(resqu.shape[0], dtype=np.int), si)

    samp = resqu[si, :].copy()

    t = time.time()
    d = geom.pdistq(samp,
                    np.zeros((samp.shape[0], samp.shape[0]), dtype=np.double))
    log.info("Sample pairwise distances calculated in %0.3f s" %
             (time.time() - t))

    g = geom.double_center(d, inplace=False)

    t = time.time()
    vals, vecs = np.linalg.eigh(g)
    log.info("Sample Gram matrix decomposed in %0.3f s" % (time.time() - t))

    np.save(args.output + "_evals.npy", vals)
    np.save(args.output + "_evecs.npy", vecs)

    x = vecs[:, [-1, -2, -3]].dot(np.diag(np.sqrt(vals[[-1, -2, -3]])))

    np.save(args.output + "_xtrain.npy", x)

    test = resqu[not_si].copy()

    t = time.time()
    ga = geom.cdistq(test, samp,
                     np.zeros((test.shape[0], samp.shape[0]), dtype=np.single))
    log.info("Test pairwise distances calculated in %0.3f s" %
             (time.time() - t))

    ga = geom.double_center(ga, reference=d, inplace=True)

    xa = ga.dot(x) / vals[[-1, -2, -3]].reshape(1, 3)

    np.save(args.output + "_xtest.npy", xa)

    vol, hdr = mrc.read(args.volume, inc_header=True)
    psz = hdr["xlen"] / hdr["nx"]
    for pc in range(2):
        keyq = geom.findkeyq(test,
                             xa,
                             nkey=10,
                             pc_cyl_ptile=args.outlier_radius,
                             pc_ptile=args.outlier_length,
                             pc=pc)
        keyq_exp = geom.qslerp_mult_balanced(keyq, 10)
        volbase = os.path.basename(
            args.volume).rstrip(".mrc") + "_kpc%d" % pc + "_%.4d.mrc"
        util.write_q_series(vol,
                            keyq_exp,
                            os.path.join(outdir, volbase),
                            psz=psz,
                            order=args.spline_order)

    return 0
Beispiel #28
0
def main(args):
    meta = parse_metadata(args.input)  # Read cryosparc metadata file.
    meta["data_input_idx"] = [
        "%.6d" % (i + 1) for i in meta["data_input_idx"]
    ]  # Reformat particle idx for Relion.

    if "data_input_relpath" not in meta.columns:
        if args.data_path is None:
            print(
                "Data path missing, use --data-path to specify particle stack path"
            )
            return 1
        meta["data_input_relpath"] = args.data_path

    meta["data_input_relpath"] = meta["data_input_idx"].str.cat(
        meta["data_input_relpath"], sep="@")  # Construct rlnImageName field.
    # Take care of trivial mappings.
    rlnheaders = [
        general[h] for h in meta.columns
        if h in general and general[h] is not None
    ]
    df = meta[[
        h for h in meta.columns if h in general and general[h] is not None
    ]].copy()
    df.columns = rlnheaders

    if "rlnRandomSubset" in df.columns:
        df["rlnRandomSubset"] = df["rlnRandomSubset"].apply(
            lambda x: ord(x) - 64)

    if "rlnPhaseShift" in df.columns:
        df["rlnPhaseShift"] = np.rad2deg(df["rlnPhaseShift"])

    # Class assignments and other model parameters.
    phic = meta[[h for h in meta.columns if "phiC" in h
                 ]]  # Posterior probability over class assignments.
    if len(phic.columns) > 0:  # Check class assignments exist in input.
        # phic.columns = [int(h[21]) for h in meta.columns if "phiC" in h]
        phic.columns = range(len(phic.columns))
        cls = phic.idxmax(axis=1)
        for p in model:
            if model[p] is not None:
                pspec = p.split("model")[1]
                param = meta[[h for h in meta.columns if pspec in h]]
                if len(param.columns) > 0:
                    param.columns = phic.columns
                    df[model[p]] = param.lookup(param.index, cls)
        df["rlnClassNumber"] = cls + 1  # Add one for Relion indexing.
    else:
        for p in model:
            if model[p] is not None and p in meta.columns:
                df[model[p]] = meta[p]
        df["rlnClassNumber"] = 1

    if args.cls is not None:
        df = star.select_classes(df, args.cls)

    # Convert axis-angle representation to Euler angles (degrees).
    if df.columns.intersection(star.Relion.ANGLES).size == len(
            star.Relion.ANGLES):
        df[star.Relion.ANGLES] = np.rad2deg(df[star.Relion.ANGLES].apply(
            lambda x: rot2euler(expmap(x)), axis=1, raw=True, broadcast=True))

    if args.minphic is not None:
        mask = np.all(phic < args.minphic, axis=1)
        if args.keep_bad:
            df.loc[mask, "rlnClassNumber"] = 0
        else:
            df.drop(df[mask].index, inplace=True)

    if args.copy_micrograph_coordinates is not None:
        coord_star = pd.concat(
            (star.parse_star(inp, keep_index=False)
             for inp in glob(args.copy_micrograph_coordinates)),
            join="inner")
        df = star.smart_merge(df,
                              coord_star,
                              fields=star.Relion.MICROGRAPH_COORDS)

    if args.transform is not None:
        r = np.array(json.loads(args.transform))
        df = star.transform_star(df, r, inplace=True)

    # Write Relion .star file with correct headers.
    star.write_star(args.output, df, reindex=True)
    return 0
Beispiel #29
0
def main(args):
    log = logging.getLogger('root')
    hdlr = logging.StreamHandler(sys.stdout)
    log.addHandler(hdlr)
    log.setLevel(logging.getLevelName(args.loglevel.upper()))
    # apix = args.apix = hdr["xlen"] / hdr["nx"]

    for fn in args.input:
        if not (fn.endswith(".star") or fn.endswith(".mrcs") or
                fn.endswith(".mrc") or fn.endswith(".par")):
            log.error("Only .star, .mrc, .mrcs, and .par files supported")
            return 1

    first_ptcl = 0
    dfs = []
    with mrc.ZSliceWriter(args.output) as writer:
        for fn in args.input:
            if fn.endswith(".star"):
                df = star.parse_star(fn, augment=True)
                if args.cls is not None:
                    df = star.select_classes(df, args.cls)
                star.set_original_fields(df, inplace=True)
                if args.resort:
                    df = df.sort_values([star.UCSF.IMAGE_ORIGINAL_PATH,
                                         star.UCSF.IMAGE_ORIGINAL_INDEX])
                for idx, row in df.iterrows():
                    if args.stack_path is not None:
                        input_stack_path = os.path.join(args.stack_path, row[star.UCSF.IMAGE_ORIGINAL_PATH])
                    else:
                        input_stack_path = row[star.UCSF.IMAGE_ORIGINAL_PATH]
                    with mrc.ZSliceReader(input_stack_path) as reader:
                        i = row[star.UCSF.IMAGE_ORIGINAL_INDEX]
                        writer.write(reader.read(i))
            elif fn.endswith(".par"):
                if args.stack_path is None:
                    log.error(".par file input requires --stack-path")
                    return 1
                df = metadata.par2star(metadata.parse_fx_par(fn), data_path=args.stack_path)
                # star.set_original_fields(df, inplace=True)  # Redundant.
                star.augment_star_ucsf(df)
            elif fn.endswith(".csv"):
                return 1
            elif fn.endswith(".cs"):
                return 1
            else:
                if fn.endswith(".mrcs"):
                    with mrc.ZSliceReader(fn) as reader:
                        for img in reader:
                            writer.write(img)
                        df = pd.DataFrame(
                            {star.UCSF.IMAGE_ORIGINAL_INDEX: np.arange(reader.nz)})
                    df[star.UCSF.IMAGE_ORIGINAL_PATH] = fn
                else:
                    print("Unrecognized input file type")
                    return 1
            if args.star is not None:
                df[star.UCSF.IMAGE_INDEX] = np.arange(first_ptcl,
                                                      first_ptcl + df.shape[0])
                if args.abs_path:
                    df[star.UCSF.IMAGE_PATH] = writer.path
                else:
                    df[star.UCSF.IMAGE_PATH] = os.path.relpath(writer.path, os.path.dirname(args.star))
                df["index"] = df[star.UCSF.IMAGE_INDEX]
                star.simplify_star_ucsf(df)
                dfs.append(df)
            first_ptcl += df.shape[0]

    if args.star is not None:
        df = pd.concat(dfs, join="inner")
        # df = pd.concat(dfs)
        # df = df.dropna(df, axis=1, how="any")
        if not args.relion2:  # Relion 3.1 style output.
            df = star.remove_deprecated_relion2(df, inplace=True)
            star.write_star(args.star, df, resort_records=False, optics=True)
        else:
            df = star.remove_new_relion31(df, inplace=True)
            star.write_star(args.star, df, resort_records=False, optics=False)
    return 0