def main(args): log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) if args.input.endswith(".cs"): log.debug("Detected CryoSPARC 2+ .cs file") cs = np.load(args.input) try: df = metadata.parse_cryosparc_2_cs(cs, passthrough=args.passthrough, minphic=args.minphic) except (KeyError, ValueError) as e: log.error(e.message) log.error( "A passthrough file may be required (check inside the cryoSPARC 2+ job directory)" ) log.debug(e, exc_info=True) return 1 else: log.debug("Detected CryoSPARC 0.6.5 .csv file") meta = metadata.parse_cryosparc_065_csv( args.input) # Read cryosparc metadata file. df = metadata.cryosparc_065_csv2star(meta, args.minphic) if args.cls is not None: df = star.select_classes(df, args.cls) if args.copy_micrograph_coordinates is not None: coord_star = pd.concat( (star.parse_star(inp, keep_index=False) for inp in glob(args.copy_micrograph_coordinates)), join="inner") star.augment_star_ucsf(coord_star) star.augment_star_ucsf(df) key = star.merge_key(df, coord_star) log.debug("Coordinates merge key: %s" % key) if args.cached or key == star.Relion.IMAGE_NAME: fields = star.Relion.MICROGRAPH_COORDS else: fields = star.Relion.MICROGRAPH_COORDS + [ star.UCSF.IMAGE_INDEX, star.UCSF.IMAGE_PATH ] df = star.smart_merge(df, coord_star, fields=fields, key=key) star.simplify_star_ucsf(df) if args.micrograph_path is not None: df = star.replace_micrograph_path(df, args.micrograph_path, inplace=True) if args.transform is not None: r = np.array(json.loads(args.transform)) df = star.transform_star(df, r, inplace=True) # Write Relion .star file with correct headers. star.write_star(args.output, df, reindex=True) log.info("Output fields: %s" % ", ".join(df.columns)) return 0
def main(args): log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) if args.input[0].endswith(".cs"): log.debug("Detected CryoSPARC 2+ .cs file") cs = np.load(args.input[0]) try: df = metadata.parse_cryosparc_2_cs(cs, passthroughs=args.input[1:], minphic=args.minphic, boxsize=args.boxsize, swapxy=args.swapxy, invertx=args.invertx, inverty=args.inverty) except (KeyError, ValueError) as e: log.error(e, exc_info=True) log.error("Required fields could not be mapped. Are you using the right input file(s)?") return 1 else: log.debug("Detected CryoSPARC 0.6.5 .csv file") if len(args.input) > 1: log.error("Only one file at a time supported for CryoSPARC 0.6.5 .csv format") return 1 meta = metadata.parse_cryosparc_065_csv(args.input[0]) # Read cryosparc metadata file. df = metadata.cryosparc_065_csv2star(meta, args.minphic) if args.cls is not None: df = star.select_classes(df, args.cls) if args.copy_micrograph_coordinates is not None: df = star.augment_star_ucsf(df, inplace=True) coord_star = pd.concat( (star.parse_star(inp, keep_index=False, augment=True) for inp in glob(args.copy_micrograph_coordinates)), join="inner") key = star.merge_key(df, coord_star) log.debug("Coordinates merge key: %s" % key) if args.cached or key == star.Relion.IMAGE_NAME: fields = star.Relion.MICROGRAPH_COORDS else: fields = star.Relion.MICROGRAPH_COORDS + [star.UCSF.IMAGE_INDEX, star.UCSF.IMAGE_PATH] df = star.smart_merge(df, coord_star, fields=fields, key=key) star.simplify_star_ucsf(df) if args.micrograph_path is not None: df = star.replace_micrograph_path(df, args.micrograph_path, inplace=True) if args.transform is not None: r = np.array(json.loads(args.transform)) df = star.transform_star(df, r, inplace=True) df = star.check_defaults(df, inplace=True) if args.relion2: df = star.remove_new_relion31(df, inplace=True) star.write_star(args.output, df, resort_records=True, optics=False) else: df = star.remove_deprecated_relion2(df, inplace=True) star.write_star(args.output, df, resort_records=True, optics=True) log.info("Output fields: %s" % ", ".join(df.columns)) return 0
def main(args): log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) # apix = args.apix = hdr["xlen"] / hdr["nx"] for fn in args.input: if not (fn.endswith(".star") or fn.endswith(".mrcs") or fn.endswith(".mrc")): log.error("Only .star, .mrc, and .mrcs files supported") return 1 first_ptcl = 0 dfs = [] with mrc.ZSliceWriter(args.output) as writer: for fn in args.input: if fn.endswith(".star"): df = star.parse_star(fn, keep_index=False) star.augment_star_ucsf(df) df = df.sort_values([ star.UCSF.IMAGE_ORIGINAL_PATH, star.UCSF.IMAGE_ORIGINAL_INDEX ]) gb = df.groupby(star.UCSF.IMAGE_ORIGINAL_PATH) for name, g in gb: with mrc.ZSliceReader(name) as reader: for i in g[star.UCSF.IMAGE_ORIGINAL_INDEX].values: writer.write(reader.read(i)) else: with mrc.ZSliceReader(fn) as reader: for img in reader: writer.write(img) df = pd.DataFrame( {star.UCSF.IMAGE_ORIGINAL_INDEX: np.arange(reader.nz)}) df[star.UCSF.IMAGE_ORIGINAL_PATH] = fn if args.star is not None: df[star.UCSF.IMAGE_INDEX] = np.arange(first_ptcl, first_ptcl + df.shape[0]) df[star.UCSF.IMAGE_PATH] = writer.path df["index"] = df[star.UCSF.IMAGE_INDEX] star.simplify_star_ucsf(df) dfs.append(df) first_ptcl += df.shape[0] if args.star is not None: df = pd.concat(dfs, join="inner") # df = pd.concat(dfs) # df = df.dropna(df, axis=1, how="any") star.write_star(args.star, df, reindex=True) return 0
def main(args): pyfftw.interfaces.cache.enable() refmap = mrc.read(args.key, compat="relion") df = star.parse_star(args.input, keep_index=False) star.augment_star_ucsf(df) refmap_ft = vop.vol_ft(refmap, threads=args.threads) apix = star.calculate_apix(df) sz = refmap_ft.shape[0] // 2 - 1 sx, sy = np.meshgrid(rfftfreq(sz), fftfreq(sz)) s = np.sqrt(sx**2 + sy**2) r = s * sz r = np.round(r).astype(np.int64) r[r > sz // 2] = sz // 2 + 1 a = np.arctan2(sy, sx) def1 = df["rlnDefocusU"].values def2 = df["rlnDefocusV"].values angast = df["rlnDefocusAngle"].values phase = df["rlnPhaseShift"].values kv = df["rlnVoltage"].values ac = df["rlnAmplitudeContrast"].values cs = df["rlnSphericalAberration"].values xshift = df["rlnOriginX"].values yshift = df["rlnOriginY"].values score = np.zeros(df.shape[0]) # TODO parallelize for i, row in df.iterrows(): xcor = particle_xcorr(row, refmap_ft) if args.top is None: args.top = df.shape[0] top = df.iloc[np.argsort(score)][:args.top] star.simplify_star_ucsf(top) star.write_star(args.output, top) return 0
def main(args): log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) df = star.parse_star(args.input, keep_index=False) star.augment_star_ucsf(df) maxshift = np.round(np.max(np.abs(df[star.Relion.ORIGINS].values))) if args.map is not None: if args.map.endswith(".npy"): log.info("Reading precomputed 3D FFT of volume") f3d = np.load(args.map) log.info("Finished reading 3D FFT of volume") if args.size is None: args.size = (f3d.shape[0] - 3) // args.pfac else: vol = mrc.read(args.map, inc_header=False, compat="relion") if args.mask is not None: mask = mrc.read(args.mask, inc_header=False, compat="relion") vol *= mask if args.size is None: args.size = vol.shape[0] if args.crop is not None and args.size // 2 < maxshift + args.crop // 2: log.error( "Some shifts are too large to crop (maximum crop is %d)" % (args.size - 2 * maxshift)) return 1 log.info("Preparing 3D FFT of volume") f3d = vop.vol_ft(vol, pfac=args.pfac, threads=args.threads) log.info("Finished 3D FFT of volume") else: log.error("Please supply a map") return 1 sz = (f3d.shape[0] - 3) // args.pfac apix = star.calculate_apix(df) * np.double(args.size) / sz sx, sy = np.meshgrid(np.fft.rfftfreq(sz), np.fft.fftfreq(sz)) s = np.sqrt(sx**2 + sy**2) a = np.arctan2(sy, sx) log.info("Projection size is %d, unpadded volume size is %d" % (args.size, sz)) log.info("Effective pixel size is %f A/px" % apix) if args.subtract and args.size != sz: log.error("Volume and projections must be same size when subtracting") return 1 if args.crop is not None and args.size // 2 < maxshift + args.crop // 2: log.error("Some shifts are too large to crop (maximum crop is %d)" % (args.size - 2 * maxshift)) return 1 ift = None with mrc.ZSliceWriter(args.output, psz=apix) as zsw: for i, p in df.iterrows(): f2d = project(f3d, p, s, sx, sy, a, pfac=args.pfac, apply_ctf=args.ctf, size=args.size, flip_phase=args.flip) if ift is None: ift = irfft2(f2d.copy(), threads=args.threads, planner_effort="FFTW_ESTIMATE", auto_align_input=True, auto_contiguous=True) proj = fftshift( ift(f2d.copy(), np.zeros(ift.output_shape, dtype=ift.output_dtype))) log.debug("%f +/- %f" % (np.mean(proj), np.std(proj))) if args.subtract: with mrc.ZSliceReader(p["ucsfImagePath"]) as zsr: img = zsr.read(p["ucsfImageIndex"]) log.debug("%f +/- %f" % (np.mean(img), np.std(img))) proj = img - proj if args.crop is not None: orihalf = args.size // 2 newhalf = args.crop // 2 x = orihalf - np.int(np.round(p[star.Relion.ORIGINX])) y = orihalf - np.int(np.round(p[star.Relion.ORIGINY])) proj = proj[y - newhalf:y + newhalf, x - newhalf:x + newhalf] zsw.write(proj) log.debug( "%d@%s: %d/%d" % (p["ucsfImageIndex"], p["ucsfImagePath"], i + 1, df.shape[0])) if args.star is not None: log.info("Writing output .star file") if args.crop is not None: df = star.recenter(df, inplace=True) if args.subtract: df[star.UCSF.IMAGE_ORIGINAL_PATH] = df[star.UCSF.IMAGE_PATH] df[star.UCSF.IMAGE_ORIGINAL_INDEX] = df[star.UCSF.IMAGE_INDEX] df[star.UCSF.IMAGE_PATH] = args.output df[star.UCSF.IMAGE_INDEX] = np.arange(df.shape[0]) star.simplify_star_ucsf(df) star.write_star(args.star, df) return 0
def main(args): """ Projection subtraction program entry point. :param args: Command-line arguments parsed by ArgumentParser.parse_args() :return: Exit status """ log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) if args.dest is None and args.suffix == "": args.dest = "" args.suffix = "_subtracted" log.info("Reading particle .star file") df = star.parse_star(args.input, keep_index=False) star.augment_star_ucsf(df) if not args.original: df[star.UCSF.IMAGE_ORIGINAL_PATH] = df[star.UCSF.IMAGE_PATH] df[star.UCSF.IMAGE_ORIGINAL_INDEX] = df[star.UCSF.IMAGE_INDEX] df.sort_values(star.UCSF.IMAGE_ORIGINAL_PATH, inplace=True, kind="mergesort") gb = df.groupby(star.UCSF.IMAGE_ORIGINAL_PATH) df[star.UCSF.IMAGE_INDEX] = gb.cumcount() df[star.UCSF.IMAGE_PATH] = df[star.UCSF.IMAGE_ORIGINAL_PATH].map( lambda x: os.path.join( args.dest, args.prefix + os.path.basename(x).replace( ".mrcs", args.suffix + ".mrcs"))) if args.submap_ft is None: log.info("Reading volume") submap = mrc.read(args.submap, inc_header=False, compat="relion") if args.submask is not None: log.info("Masking volume") submask = mrc.read(args.submask, inc_header=False, compat="relion") submap *= submask log.info("Preparing 3D FFT of volume") submap_ft = vop.vol_ft(submap, pfac=args.pfac, threads=min(args.threads, cpu_count())) log.info("Finished 3D FFT of volume") else: log.info("Loading 3D FFT from %s" % args.submap_ft) submap_ft = np.load(args.submap_ft) log.info("Loaded 3D FFT from %s" % args.submap_ft) sz = (submap_ft.shape[0] - 3) // args.pfac maxshift = np.round(np.max(np.abs(df[star.Relion.ORIGINS].values))) if args.crop is not None and sz < 2 * maxshift + args.crop: log.error("Some shifts are too large to crop (maximum crop is %d)" % (sz - 2 * maxshift)) return 1 sx, sy = np.meshgrid(np.fft.rfftfreq(sz), np.fft.fftfreq(sz)) s = np.sqrt(sx**2 + sy**2) r = s * sz r = np.round(r).astype(np.int64) r[r > sz // 2] = sz // 2 + 1 nr = np.max(r) + 1 a = np.arctan2(sy, sx) if args.refmap is not None: coefs_method = 1 if args.refmap_ft is None: refmap = mrc.read(args.refmap, inc_header=False, compat="relion") refmap_ft = vop.vol_ft(refmap, pfac=args.pfac, threads=min(args.threads, cpu_count())) else: log.info("Loading 3D FFT from %s" % args.refmap_ft) refmap_ft = np.load(args.refmap_ft) log.info("Loaded 3D FFT from %s" % args.refmap_ft) else: coefs_method = 0 refmap_ft = np.empty(submap_ft.shape, dtype=submap_ft.dtype) apix = star.calculate_apix(df) log.info("Computed pixel size is %f A" % apix) log.debug("Grouping particles by output stack") gb = df.groupby(star.UCSF.IMAGE_PATH) iothreads = threading.BoundedSemaphore(args.io_thread_pairs) qsize = args.io_queue_length fftthreads = args.fft_threads def init(): global tls tls = threading.local() log.info("Instantiating thread pool with %d workers" % args.threads) pool = Pool(processes=args.threads, initializer=init) threads = [] log.info("Performing projection subtraction") try: for fname, particles in gb: log.debug("Instantiating queue") queue = Queue.Queue(maxsize=qsize) log.debug("Create producer for %s" % fname) prod = threading.Thread(target=producer, args=(pool, queue, submap_ft, refmap_ft, fname, particles, sx, sy, s, a, apix, coefs_method, r, nr, fftthreads, args.crop, args.pfac)) log.debug("Create consumer for %s" % fname) cons = threading.Thread(target=consumer, args=(queue, fname, apix, iothreads)) threads.append((prod, cons)) iothreads.acquire() log.debug("iotheads at %d" % iothreads._Semaphore__value) log.debug("Start consumer for %s" % fname) cons.start() log.debug("Start producer for %s" % fname) prod.start() except KeyboardInterrupt: log.debug("Main thread wants out!") for pair in threads: for thread in pair: try: thread.join() except RuntimeError as e: log.debug(e) pool.close() pool.join() pool.terminate() log.info("Finished projection subtraction") log.info("Writing output .star file") if args.crop is not None: df = star.recenter(df, inplace=True) star.simplify_star_ucsf(df) star.write_star(args.output, df) return 0
def main(args): log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) # apix = args.apix = hdr["xlen"] / hdr["nx"] for fn in args.input: if not (fn.endswith(".star") or fn.endswith(".mrcs") or fn.endswith(".mrc") or fn.endswith(".par")): log.error("Only .star, .mrc, .mrcs, and .par files supported") return 1 first_ptcl = 0 dfs = [] with mrc.ZSliceWriter(args.output) as writer: for fn in args.input: if fn.endswith(".star"): df = star.parse_star(fn, augment=True) if args.cls is not None: df = star.select_classes(df, args.cls) star.set_original_fields(df, inplace=True) if args.resort: df = df.sort_values([star.UCSF.IMAGE_ORIGINAL_PATH, star.UCSF.IMAGE_ORIGINAL_INDEX]) for idx, row in df.iterrows(): if args.stack_path is not None: input_stack_path = os.path.join(args.stack_path, row[star.UCSF.IMAGE_ORIGINAL_PATH]) else: input_stack_path = row[star.UCSF.IMAGE_ORIGINAL_PATH] with mrc.ZSliceReader(input_stack_path) as reader: i = row[star.UCSF.IMAGE_ORIGINAL_INDEX] writer.write(reader.read(i)) elif fn.endswith(".par"): if args.stack_path is None: log.error(".par file input requires --stack-path") return 1 df = metadata.par2star(metadata.parse_fx_par(fn), data_path=args.stack_path) # star.set_original_fields(df, inplace=True) # Redundant. star.augment_star_ucsf(df) elif fn.endswith(".csv"): return 1 elif fn.endswith(".cs"): return 1 else: if fn.endswith(".mrcs"): with mrc.ZSliceReader(fn) as reader: for img in reader: writer.write(img) df = pd.DataFrame( {star.UCSF.IMAGE_ORIGINAL_INDEX: np.arange(reader.nz)}) df[star.UCSF.IMAGE_ORIGINAL_PATH] = fn else: print("Unrecognized input file type") return 1 if args.star is not None: df[star.UCSF.IMAGE_INDEX] = np.arange(first_ptcl, first_ptcl + df.shape[0]) if args.abs_path: df[star.UCSF.IMAGE_PATH] = writer.path else: df[star.UCSF.IMAGE_PATH] = os.path.relpath(writer.path, os.path.dirname(args.star)) df["index"] = df[star.UCSF.IMAGE_INDEX] star.simplify_star_ucsf(df) dfs.append(df) first_ptcl += df.shape[0] if args.star is not None: df = pd.concat(dfs, join="inner") # df = pd.concat(dfs) # df = df.dropna(df, axis=1, how="any") if not args.relion2: # Relion 3.1 style output. df = star.remove_deprecated_relion2(df, inplace=True) star.write_star(args.star, df, resort_records=False, optics=True) else: df = star.remove_new_relion31(df, inplace=True) star.write_star(args.star, df, resort_records=False, optics=False) return 0
def main(args): log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) df = star.parse_star(args.input, keep_index=False) star.augment_star_ucsf(df) if args.map is not None: vol = mrc.read(args.map, inc_header=False, compat="relion") if args.mask is not None: mask = mrc.read(args.mask, inc_header=False, compat="relion") vol *= mask else: print("Please supply a map") return 1 f3d = vop.vol_ft(vol, pfac=args.pfac, threads=args.threads) sz = f3d.shape[0] // 2 - 1 sx, sy = np.meshgrid(np.fft.rfftfreq(sz), np.fft.fftfreq(sz)) s = np.sqrt(sx**2 + sy**2) a = np.arctan2(sy, sx) ift = None with mrc.ZSliceWriter(args.output) as zsw: for i, p in df.iterrows(): f2d = project(f3d, p, s, sx, sy, a, apply_ctf=args.ctf, size=args.size) if ift is None: ift = irfft2(f2d.copy(), threads=cpu_count(), planner_effort="FFTW_ESTIMATE", auto_align_input=True, auto_contiguous=True) proj = fftshift( ift(f2d.copy(), np.zeros(vol.shape[:-1], dtype=vol.dtype))) log.debug("%f +/- %f" % (np.mean(proj), np.std(proj))) if args.subtract: with mrc.ZSliceReader(p["ucsfImagePath"]) as zsr: img = zsr.read(p["ucsfImageIndex"]) log.debug("%f +/- %f" % (np.mean(img), np.std(img))) proj = img - proj zsw.write(proj) log.info( "%d@%s: %d/%d" % (p["ucsfImageIndex"], p["ucsfImagePath"], i + 1, df.shape[0])) if args.star is not None: if args.subtract: df[star.UCSF.IMAGE_ORIGINAL_PATH] = df[star.UCSF.IMAGE_PATH] df[star.UCSF.IMAGE_ORIGINAL_INDEX] = df[star.UCSF.IMAGE_INDEX] df[star.UCSF.IMAGE_PATH] = args.output df[star.UCSF.IMAGE_INDEX] = np.arange(df.shape[0]) star.simplify_star_ucsf(df) star.write_star(args.star, df) return 0