def main(args): log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) # apix = args.apix = hdr["xlen"] / hdr["nx"] for fn in args.input: if not (fn.endswith(".star") or fn.endswith(".mrcs") or fn.endswith(".mrc") or fn.endswith(".par")): log.error("Only .star, .mrc, .mrcs, and .par files supported") return 1 first_ptcl = 0 dfs = [] with mrc.ZSliceWriter(args.output) as writer: for fn in args.input: if fn.endswith(".star"): df = star.parse_star(fn, keep_index=False) star.augment_star_ucsf(df) star.set_original_fields(df, inplace=True) df = df.sort_values([ star.UCSF.IMAGE_ORIGINAL_PATH, star.UCSF.IMAGE_ORIGINAL_INDEX ]) gb = df.groupby(star.UCSF.IMAGE_ORIGINAL_PATH) for name, g in gb: with mrc.ZSliceReader(name) as reader: for i in g[star.UCSF.IMAGE_ORIGINAL_INDEX].values: writer.write(reader.read(i)) elif fn.endswith(".par"): if args.stack_path is None: log.error(".par file input requires --stack-path") return 1 df = metadata.par2star(metadata.parse_fx_par(fn), data_path=args.stack_path) # star.set_original_fields(df, inplace=True) # Redundant. star.augment_star_ucsf(df) elif fn.endswith(".csv"): return 1 elif fn.endswith(".cs"): return 1 else: if fn.endswith(".mrcs"): with mrc.ZSliceReader(fn) as reader: for img in reader: writer.write(img) df = pd.DataFrame({ star.UCSF.IMAGE_ORIGINAL_INDEX: np.arange(reader.nz) }) df[star.UCSF.IMAGE_ORIGINAL_PATH] = fn else: print("Unrecognized input file type") return 1 if args.star is not None: df[star.UCSF.IMAGE_INDEX] = np.arange(first_ptcl, first_ptcl + df.shape[0]) df[star.UCSF.IMAGE_PATH] = writer.path df["index"] = df[star.UCSF.IMAGE_INDEX] star.simplify_star_ucsf(df) dfs.append(df) first_ptcl += df.shape[0] if args.star is not None: df = pd.concat(dfs, join="inner") # df = pd.concat(dfs) # df = df.dropna(df, axis=1, how="any") star.write_star(args.star, df, reindex=True) return 0
def main(args): if args.info: args.input.append(args.output) df = pd.concat( (star.parse_star(inp, augment=args.augment) for inp in args.input), join="inner") dfaux = None if args.cls is not None: df = star.select_classes(df, args.cls) if args.info: if star.is_particle_star(df) and star.Relion.CLASS in df.columns: c = df[star.Relion.CLASS].value_counts() print("%s particles in %d classes" % ("{:,}".format(df.shape[0]), len(c))) print(" ".join([ '%d: %s (%.2f %%)' % (i, "{:,}".format(s), 100. * s / c.sum()) for i, s in iteritems(c.sort_index()) ])) elif star.is_particle_star(df): print("%s particles" % "{:,}".format(df.shape[0])) if star.Relion.MICROGRAPH_NAME in df.columns: mgraphcnt = df[star.Relion.MICROGRAPH_NAME].value_counts() print( "%s micrographs, %s +/- %s particles per micrograph" % ("{:,}".format(len(mgraphcnt)), "{:,.3f}".format( np.mean(mgraphcnt)), "{:,.3f}".format(np.std(mgraphcnt)))) try: print("%f A/px (%sX magnification)" % (star.calculate_apix(df), "{:,.0f}".format( df[star.Relion.MAGNIFICATION][0]))) except KeyError: pass if len(df.columns.intersection(star.Relion.ORIGINS3D)) > 0: print("Largest shift is %f pixels" % np.max( np.abs(df[df.columns.intersection( star.Relion.ORIGINS3D)].values))) return 0 if args.drop_angles: df.drop(star.Relion.ANGLES, axis=1, inplace=True, errors="ignore") if args.drop_containing is not None: containing_fields = [ f for q in args.drop_containing for f in df.columns if q in f ] if args.invert: containing_fields = df.columns.difference(containing_fields) df.drop(containing_fields, axis=1, inplace=True, errors="ignore") if args.offset_group is not None: df[star.Relion.GROUPNUMBER] += args.offset_group if args.restack is not None: if not args.augment: star.augment_star_ucsf(df, inplace=True) star.set_original_fields(df, inplace=True) df[star.UCSF.IMAGE_PATH] = args.restack df[star.UCSF.IMAGE_INDEX] = np.arange(df.shape[0]) if args.subsample_micrographs is not None: if args.bootstrap is not None: print("Only particle sampling allows bootstrapping") return 1 mgraphs = df[star.Relion.MICROGRAPH_NAME].unique() if args.subsample_micrographs < 1: args.subsample_micrographs = np.int( max(np.round(args.subsample_micrographs * len(mgraphs)), 1)) else: args.subsample_micrographs = np.int(args.subsample_micrographs) ind = np.random.choice(len(mgraphs), size=args.subsample_micrographs, replace=False) mask = df[star.Relion.MICROGRAPH_NAME].isin(mgraphs[ind]) if args.auxout is not None: dfaux = df.loc[~mask] df = df.loc[mask] if args.subsample is not None and args.suffix == "": if args.subsample < 1: args.subsample = np.int( max(np.round(args.subsample * df.shape[0]), 1)) else: args.subsample = np.int(args.subsample) ind = np.random.choice(df.shape[0], size=args.subsample, replace=False) mask = df.index.isin(ind) if args.auxout is not None: dfaux = df.loc[~mask] df = df.loc[mask] if args.copy_angles is not None: angle_star = star.parse_star(args.copy_angles, augment=args.augment) df = star.smart_merge(df, angle_star, fields=star.Relion.ANGLES, key=args.merge_key) if args.copy_alignments is not None: align_star = star.parse_star(args.copy_alignments, augment=args.augment) df = star.smart_merge(df, align_star, fields=star.Relion.ALIGNMENTS, key=args.merge_key) if args.copy_reconstruct_images is not None: recon_star = star.parse_star(args.copy_reconstruct_images, augment=args.augment) df[star.Relion.RECONSTRUCT_IMAGE_NAME] = recon_star[ star.Relion.IMAGE_NAME] if args.transform is not None: if args.transform.count(",") == 2: r = geom.euler2rot( *np.deg2rad([np.double(s) for s in args.transform.split(",")])) else: r = np.array(json.loads(args.transform)) df = star.transform_star(df, r, inplace=True) if args.invert_hand: df = star.invert_hand(df, inplace=True) if args.copy_paths is not None: path_star = star.parse_star(args.copy_paths) star.set_original_fields(df, inplace=True) df[star.Relion.IMAGE_NAME] = path_star[star.Relion.IMAGE_NAME] if args.copy_ctf is not None: ctf_star = pd.concat((star.parse_star(inp, augment=args.augment) for inp in glob.glob(args.copy_ctf)), join="inner") df = star.smart_merge(df, ctf_star, star.Relion.CTF_PARAMS, key=args.merge_key) if args.copy_micrograph_coordinates is not None: coord_star = pd.concat( (star.parse_star(inp, augment=args.augment) for inp in glob.glob(args.copy_micrograph_coordinates)), join="inner") df = star.smart_merge(df, coord_star, fields=star.Relion.MICROGRAPH_COORDS, key=args.merge_key) if args.scale is not None: star.scale_coordinates(df, args.scale, inplace=True) star.scale_origins(df, args.scale, inplace=True) star.scale_magnification(df, args.scale, inplace=True) if args.scale_particles is not None: star.scale_origins(df, args.scale_particles, inplace=True) star.scale_magnification(df, args.scale_particles, inplace=True) if args.scale_coordinates is not None: star.scale_coordinates(df, args.scale_coordinates, inplace=True) if args.scale_origins is not None: star.scale_origins(df, args.scale_origins, inplace=True) if args.scale_magnification is not None: star.scale_magnification(df, args.scale_magnification, inplace=True) if args.scale_apix is not None: star.scale_apix(df, args.scale_apix, inplace=True) if args.recenter: df = star.recenter(df, inplace=True) if args.zero_origins: df = star.zero_origins(df, inplace=True) if args.pick: df.drop(df.columns.difference(star.Relion.PICK_PARAMS), axis=1, inplace=True, errors="ignore") if args.subsample is not None and args.suffix != "": if args.subsample < 1: print("Specific integer sample size") return 1 nsamplings = args.bootstrap if args.bootstrap is not None else df.shape[ 0] / np.int(args.subsample) inds = np.random.choice(df.shape[0], size=(nsamplings, np.int(args.subsample)), replace=args.bootstrap is not None) for i, ind in enumerate(inds): star.write_star( os.path.join( args.output, os.path.basename(args.input[0])[:-5] + args.suffix + "_%d" % (i + 1)), df.iloc[ind]) if args.to_micrographs: df = star.to_micrographs(df) if args.micrograph_range: df.set_index(star.Relion.MICROGRAPH_NAME, inplace=True) m, n = [int(tok) for tok in args.micrograph_range.split(",")] mg = df.index.unique().sort_values() outside = list(range(0, m)) + list(range(n, len(mg))) dfaux = df.loc[mg[outside]].reset_index() df = df.loc[mg[m:n]].reset_index() if args.micrograph_path is not None: df = star.replace_micrograph_path(df, args.micrograph_path, inplace=True) if args.min_separation is not None: gb = df.groupby(star.Relion.MICROGRAPH_NAME) dupes = [] for n, g in gb: nb = algo.query_connected( g[star.Relion.COORDS].values - g[star.Relion.ORIGINS], args.min_separation / star.calculate_apix(df)) dupes.extend(g.index[~np.isnan(nb)]) dfaux = df.loc[dupes] df.drop(dupes, inplace=True) if args.merge_source is not None: if args.merge_fields is not None: if "," in args.merge_fields: args.merge_fields = args.merge_fields.split(",") else: args.merge_fields = [args.merge_fields] else: print("Merge fields must be specified using --merge-fields") return 1 if args.merge_key is not None: if "," in args.merge_key: args.merge_key = args.merge_key.split(",") if args.by_original: args.by_original = star.original_field(args.merge_key) else: args.by_original = args.merge_key merge_star = star.parse_star(args.merge_source, augment=args.augment) df = star.smart_merge(df, merge_star, fields=args.merge_fields, key=args.merge_key, left_key=args.by_original) if args.revert_original: df = star.revert_original(df, inplace=True) if args.set_optics is not None: tok = args.set_optics.split(",") df = star.set_optics_groups(df, sep=tok[0], idx=int(tok[1]), inplace=True) df.dropna(axis=0, how="any", inplace=True) if args.drop_optics_group is not None: idx = df[star.Relion.OPTICSGROUP].isin(args.drop_optics_group) if not np.any(idx): idx = df[star.Relion.OPTICSGROUPNAME].isin(args.drop_optics_group) if not np.any(idx): print("No group found to drop") return 1 df = df.loc[~idx] if args.split_micrographs: dfs = star.split_micrographs(df) for mg in dfs: star.write_star( os.path.join(args.output, os.path.basename(mg)[:-4]) + args.suffix, dfs[mg]) return 0 if args.auxout is not None and dfaux is not None: if not args.relion2: df = star.remove_deprecated_relion2(dfaux, inplace=True) star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output, optics=True) else: df = star.remove_new_relion31(dfaux, inplace=True) star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output, optics=False) if args.output is not None: if not args.relion2: # Relion 3.1 style output. df = star.remove_deprecated_relion2(df, inplace=True) star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output, optics=True) else: df = star.remove_new_relion31(df, inplace=True) star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output, optics=False) return 0
def main(args): log = logging.getLogger('root') hdlr = logging.StreamHandler(sys.stdout) log.addHandler(hdlr) log.setLevel(logging.getLevelName(args.loglevel.upper())) # apix = args.apix = hdr["xlen"] / hdr["nx"] for fn in args.input: if not (fn.endswith(".star") or fn.endswith(".mrcs") or fn.endswith(".mrc") or fn.endswith(".par")): log.error("Only .star, .mrc, .mrcs, and .par files supported") return 1 first_ptcl = 0 dfs = [] with mrc.ZSliceWriter(args.output) as writer: for fn in args.input: if fn.endswith(".star"): df = star.parse_star(fn, augment=True) if args.cls is not None: df = star.select_classes(df, args.cls) star.set_original_fields(df, inplace=True) if args.resort: df = df.sort_values([star.UCSF.IMAGE_ORIGINAL_PATH, star.UCSF.IMAGE_ORIGINAL_INDEX]) for idx, row in df.iterrows(): if args.stack_path is not None: input_stack_path = os.path.join(args.stack_path, row[star.UCSF.IMAGE_ORIGINAL_PATH]) else: input_stack_path = row[star.UCSF.IMAGE_ORIGINAL_PATH] with mrc.ZSliceReader(input_stack_path) as reader: i = row[star.UCSF.IMAGE_ORIGINAL_INDEX] writer.write(reader.read(i)) elif fn.endswith(".par"): if args.stack_path is None: log.error(".par file input requires --stack-path") return 1 df = metadata.par2star(metadata.parse_fx_par(fn), data_path=args.stack_path) # star.set_original_fields(df, inplace=True) # Redundant. star.augment_star_ucsf(df) elif fn.endswith(".csv"): return 1 elif fn.endswith(".cs"): return 1 else: if fn.endswith(".mrcs"): with mrc.ZSliceReader(fn) as reader: for img in reader: writer.write(img) df = pd.DataFrame( {star.UCSF.IMAGE_ORIGINAL_INDEX: np.arange(reader.nz)}) df[star.UCSF.IMAGE_ORIGINAL_PATH] = fn else: print("Unrecognized input file type") return 1 if args.star is not None: df[star.UCSF.IMAGE_INDEX] = np.arange(first_ptcl, first_ptcl + df.shape[0]) if args.abs_path: df[star.UCSF.IMAGE_PATH] = writer.path else: df[star.UCSF.IMAGE_PATH] = os.path.relpath(writer.path, os.path.dirname(args.star)) df["index"] = df[star.UCSF.IMAGE_INDEX] star.simplify_star_ucsf(df) dfs.append(df) first_ptcl += df.shape[0] if args.star is not None: df = pd.concat(dfs, join="inner") # df = pd.concat(dfs) # df = df.dropna(df, axis=1, how="any") if not args.relion2: # Relion 3.1 style output. df = star.remove_deprecated_relion2(df, inplace=True) star.write_star(args.star, df, resort_records=False, optics=True) else: df = star.remove_new_relion31(df, inplace=True) star.write_star(args.star, df, resort_records=False, optics=False) return 0