def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) # generic args csv_cols = ['sample'] + list(ReadUnit._fields) parser.add_argument( '-i', "--csv", required=True, help="CSV input file describing your samples using the" " following columns: {} (sample and fq1 are mandatory; leave unknown fields empty)" .format(", ".join("{}:{}".format(i + 1, c) for i, c in enumerate(csv_cols)))) parser.add_argument('-o', "--yaml", required=True, help="Output config (yaml) file") parser.add_argument('-d', '--delimiter', default="\t", help="Use this delimiter for CSV (default is <tab>)") parser.add_argument('-f', '--force-overwrite', action='store_true', help="Force overwriting of existing file") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose) if len(args.delimiter) != 1: logger.fatal("Delimiter needs to be exactly one character") sys.exit(1) if not os.path.exists(args.csv): logger.fatal("Input file %s does not exist", args.csv) sys.exit(1) if os.path.exists(args.yaml) and not args.force_overwrite: logger.fatal("Cowardly refusing to overwrite existing file %s", args.yaml) sys.exit(1) samples = dict() readunits = dict() with open(args.csv) as csvfile: csvreader = csv.reader(csvfile, delimiter=args.delimiter) for row in csvreader: if len(row) == 0: continue logger.debug( "DEBUG row %s", "\t".join("{}:{}".format(k, v) for k, v in zip(csv_cols, row))) if len(row) != len(csv_cols): logger.fatal("Only found %s fields (require %s) in row: %s", len(row), len(csv_cols), '\t'.join(row)) sys.exit(1) sample_name = row[0] ru_fields = row[1:] ru_fields = [x if len(x.strip()) else None for x in ru_fields] #sys.stderr.write("ru_fields={}".format(ru_fields) + "\n") ru = ReadUnit._make(ru_fields) if not ru.rg_id: ru = ru._replace(rg_id=create_rg_id_from_ru(ru)) ru_key = key_for_readunit(ru) readunits[ru_key] = dict(ru._asdict()) if sample_name not in samples: samples[sample_name] = [] samples[sample_name].append(ru_key) with open(args.yaml, 'w') as fh: yaml.dump(dict(samples=samples), fh, default_flow_style=False) yaml.dump(dict(readunits=readunits), fh, default_flow_style=False)
def main(): """main function """ parser = argparse.ArgumentParser(description=__doc__) # generic args csv_cols = ['sample'] + list(ReadUnit._fields) parser.add_argument('-i', "--csv", required=True, help="CSV input file describing your samples using the" " following columns: {} (sample and fq1 are mandatory; leave unknown fields empty)".format( ", ".join("{}:{}".format(i+1, c) for i, c in enumerate(csv_cols)))) parser.add_argument('-o', "--yaml", required=True, help="Output config (yaml) file") parser.add_argument('-d', '--delimiter', default="\t", help="Use this delimiter for CSV (default is <tab>)") parser.add_argument('-f', '--force-overwrite', action='store_true', help="Force overwriting of existing file") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no logging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if len(args.delimiter) != 1: logger.fatal("Delimiter needs to be exactly one character") sys.exit(1) if not os.path.exists(args.csv): logger.fatal("Input file %s does not exist", args.csv) sys.exit(1) if os.path.exists(args.yaml) and not args.force_overwrite: logger.fatal("Cowardly refusing to overwrite existing file %s", args.yaml) sys.exit(1) samples = dict() readunits = dict() with open(args.csv) as csvfile: csvreader = csv.reader(csvfile, delimiter=args.delimiter) for row in csvreader: if len(row) == 0: continue logger.debug("DEBUG row %s", "\t".join("{}:{}".format(k, v) for k, v in zip(csv_cols, row))) if len(row) != len(csv_cols): logger.fatal("Only found %s fields (require %s) in row: %s", len(row), len(csv_cols), '\t'.join(row)) sys.exit(1) sample_name = row[0] ru_fields = row[1:] ru_fields = [x if len(x.strip()) else None for x in ru_fields] #sys.stderr.write("ru_fields={}".format(ru_fields) + "\n") ru = ReadUnit._make(ru_fields) if not ru.rg_id: ru = ru._replace(rg_id=create_rg_id_from_ru(ru)) ru_key = key_for_readunit(ru) readunits[ru_key] = dict(ru._asdict()) if sample_name not in samples: samples[sample_name] = [] samples[sample_name].append(ru_key) with open(args.yaml, 'w') as fh: yaml.dump(dict(samples=samples), fh, default_flow_style=False) yaml.dump(dict(readunits=readunits), fh, default_flow_style=False)
def main(): """main function""" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("-b", "--bcl2fastq", required=True, help="bcl2fastq directory") parser.add_argument("-o", "--outpref", help="Output prefix used for created yaml files per MUX (default: bcl2fastq dir)") parser.add_argument('-f', "--overwrite", action='store_true', help="Overwrite existing files") parser.add_argument('-n', "--dry-run", action='store_true', help="Dry run") parser.add_argument('-v', '--verbose', action='count', default=0, help="Increase verbosity") parser.add_argument('-q', '--quiet', action='count', default=0, help="Decrease verbosity") args = parser.parse_args() # Repeateable -v and -q for setting logging level. # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/ # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4 # script -vv -> DEBUG # script -v -> INFO # script -> WARNING # script -q -> ERROR # script -qq -> CRITICAL # script -qqq -> no loggerging at all logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose) if not os.path.exists(args.bcl2fastq): logger.fatal("out_dir %s does not exist", args.bcl2fastq) sys.exit(1) confinfo = os.path.join(args.bcl2fastq + '/conf.yaml') if not os.path.exists(confinfo): logger.fatal("conf info '%s' does not exist under Run directory.\n", confinfo) sys.exit(1) if args.outpref: outprefix = args.outpref else: outprefix = args.bcl2fastq # FIXME too many levels of nesting. export to functions with open(confinfo) as fh_cfg: yaml_data = yaml.safe_load(fh_cfg) assert "units" in yaml_data assert "run_num" in yaml_data run_num = yaml_data["run_num"] for mux, units in yaml_data["units"].items(): mux_id = mux.split("_")[-1] mux_folder = os.path.join(args.bcl2fastq, "out", mux) if not os.path.exists(mux_folder): continue # samples and readunits per mux samples = {} readunits = {} for child in os.listdir(os.path.join(args.bcl2fastq, "out", mux)): if not child.startswith('Sample'): continue sample_id = child.split('_')[-1] samples[sample_id] = [] sample_path = os.path.join(args.bcl2fastq, "out", mux, child) for lane_id in units["lane_ids"]: status, fq1, fq2 = check_fastq(sample_path, lane_id) if not status: # FIXME throw error? continue ru = ReadUnit(run_num, units["flowcell_id"], sample_id, lane_id, None, fq1, fq2) ru = ru._replace(rg_id=create_rg_id_from_ru(ru)) k = key_for_readunit(ru) readunits[k] = dict(ru._asdict()) samples[sample_id].append(k) # write yaml per mux muxinfo_cfg = outprefix + mux_id + ".yaml" if args.dry_run: logger.warning("Skipped creation of %s", muxinfo_cfg) else: if os.path.exists(muxinfo_cfg) and not args.overwrite: logger.fatal("Refusing to overwrite existing file %s", muxinfo_cfg) sys.exit(1) with open(muxinfo_cfg, 'w') as fh: fh.write(yaml.dump(dict(samples=samples), default_flow_style=False)) fh.write(yaml.dump(dict(readunits=readunits), default_flow_style=False)) logger.info("Created %s", muxinfo_cfg)