Beispiel #1
0
def main():
    """main function
    """

    parser = argparse.ArgumentParser(description=__doc__)

    # generic args
    csv_cols = ['sample'] + list(ReadUnit._fields)
    parser.add_argument(
        '-i',
        "--csv",
        required=True,
        help="CSV input file describing your samples using the"
        " following columns: {} (sample and fq1 are mandatory; leave unknown fields empty)"
        .format(", ".join("{}:{}".format(i + 1, c)
                          for i, c in enumerate(csv_cols))))
    parser.add_argument('-o',
                        "--yaml",
                        required=True,
                        help="Output config (yaml) file")
    parser.add_argument('-d',
                        '--delimiter',
                        default="\t",
                        help="Use this delimiter for CSV (default is <tab>)")
    parser.add_argument('-f',
                        '--force-overwrite',
                        action='store_true',
                        help="Force overwriting of existing file")
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Increase verbosity")
    parser.add_argument('-q',
                        '--quiet',
                        action='count',
                        default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if len(args.delimiter) != 1:
        logger.fatal("Delimiter needs to be exactly one character")
        sys.exit(1)
    if not os.path.exists(args.csv):
        logger.fatal("Input file %s does not exist", args.csv)
        sys.exit(1)
    if os.path.exists(args.yaml) and not args.force_overwrite:
        logger.fatal("Cowardly refusing to overwrite existing file %s",
                     args.yaml)
        sys.exit(1)

    samples = dict()
    readunits = dict()

    with open(args.csv) as csvfile:
        csvreader = csv.reader(csvfile, delimiter=args.delimiter)
        for row in csvreader:
            if len(row) == 0:
                continue
            logger.debug(
                "DEBUG row %s",
                "\t".join("{}:{}".format(k, v) for k, v in zip(csv_cols, row)))
            if len(row) != len(csv_cols):
                logger.fatal("Only found %s fields (require %s) in row: %s",
                             len(row), len(csv_cols), '\t'.join(row))
                sys.exit(1)
            sample_name = row[0]
            ru_fields = row[1:]
            ru_fields = [x if len(x.strip()) else None for x in ru_fields]
            #sys.stderr.write("ru_fields={}".format(ru_fields) + "\n")
            ru = ReadUnit._make(ru_fields)
            if not ru.rg_id:
                ru = ru._replace(rg_id=create_rg_id_from_ru(ru))
            ru_key = key_for_readunit(ru)

            readunits[ru_key] = dict(ru._asdict())
            if sample_name not in samples:
                samples[sample_name] = []
            samples[sample_name].append(ru_key)

    with open(args.yaml, 'w') as fh:
        yaml.dump(dict(samples=samples), fh, default_flow_style=False)
        yaml.dump(dict(readunits=readunits), fh, default_flow_style=False)
Beispiel #2
0
def main():
    """main function
    """

    parser = argparse.ArgumentParser(description=__doc__)

    # generic args
    csv_cols = ['sample'] + list(ReadUnit._fields)
    parser.add_argument('-i', "--csv", required=True,
                        help="CSV input file describing your samples using the"
                        " following columns: {} (sample and fq1 are mandatory; leave unknown fields empty)".format(
                            ", ".join("{}:{}".format(i+1, c) for i, c in enumerate(csv_cols))))
    parser.add_argument('-o', "--yaml", required=True,
                        help="Output config (yaml) file")
    parser.add_argument('-d', '--delimiter', default="\t",
                        help="Use this delimiter for CSV (default is <tab>)")
    parser.add_argument('-f', '--force-overwrite', action='store_true',
                        help="Force overwriting of existing file")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    if len(args.delimiter) != 1:
        logger.fatal("Delimiter needs to be exactly one character")
        sys.exit(1)
    if not os.path.exists(args.csv):
        logger.fatal("Input file %s does not exist", args.csv)
        sys.exit(1)
    if os.path.exists(args.yaml) and not args.force_overwrite:
        logger.fatal("Cowardly refusing to overwrite existing file %s", args.yaml)
        sys.exit(1)

    samples = dict()
    readunits = dict()

    with open(args.csv) as csvfile:
        csvreader = csv.reader(csvfile, delimiter=args.delimiter)
        for row in csvreader:
            if len(row) == 0:
                continue
            logger.debug("DEBUG row %s", "\t".join("{}:{}".format(k, v) for k, v in zip(csv_cols, row)))
            if len(row) != len(csv_cols):
                logger.fatal("Only found %s fields (require %s) in row: %s", len(row), len(csv_cols), '\t'.join(row))
                sys.exit(1)
            sample_name = row[0]
            ru_fields = row[1:]
            ru_fields = [x if len(x.strip()) else None for x in ru_fields]
            #sys.stderr.write("ru_fields={}".format(ru_fields) + "\n")
            ru = ReadUnit._make(ru_fields)
            if not ru.rg_id:
                ru = ru._replace(rg_id=create_rg_id_from_ru(ru))
            ru_key = key_for_readunit(ru)

            readunits[ru_key] = dict(ru._asdict())
            if sample_name not in samples:
                samples[sample_name] = []
            samples[sample_name].append(ru_key)

    with open(args.yaml, 'w') as fh:
        yaml.dump(dict(samples=samples), fh, default_flow_style=False)
        yaml.dump(dict(readunits=readunits), fh, default_flow_style=False)
def main():
    """main function"""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-b", "--bcl2fastq", required=True,
                        help="bcl2fastq directory")
    parser.add_argument("-o", "--outpref",
                        help="Output prefix used for created yaml files per MUX (default: bcl2fastq dir)")
    parser.add_argument('-f', "--overwrite", action='store_true',
                        help="Overwrite existing files")
    parser.add_argument('-n', "--dry-run", action='store_true',
                        help="Dry run")
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")

    args = parser.parse_args()
    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no loggerging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    if not os.path.exists(args.bcl2fastq):
        logger.fatal("out_dir %s does not exist", args.bcl2fastq)
        sys.exit(1)

    confinfo = os.path.join(args.bcl2fastq + '/conf.yaml')
    if not os.path.exists(confinfo):
        logger.fatal("conf info '%s' does not exist under Run directory.\n", confinfo)
        sys.exit(1)

    if args.outpref:
        outprefix = args.outpref
    else:
        outprefix = args.bcl2fastq

    # FIXME too many levels of nesting. export to functions
    with open(confinfo) as fh_cfg:
        yaml_data = yaml.safe_load(fh_cfg)
        assert "units" in yaml_data
        assert "run_num" in yaml_data
        run_num = yaml_data["run_num"]

        for mux, units in yaml_data["units"].items():
            mux_id = mux.split("_")[-1]
            mux_folder = os.path.join(args.bcl2fastq, "out", mux)
            if not os.path.exists(mux_folder):
                continue

            # samples and readunits per mux
            samples = {}
            readunits = {}
            for child in os.listdir(os.path.join(args.bcl2fastq, "out", mux)):
                if not child.startswith('Sample'):
                    continue
                sample_id = child.split('_')[-1]
                samples[sample_id] = []
                sample_path = os.path.join(args.bcl2fastq, "out", mux, child)
                for lane_id in units["lane_ids"]:
                    status, fq1, fq2 = check_fastq(sample_path, lane_id)
                    if not status:
                        # FIXME throw error?
                        continue

                    ru = ReadUnit(run_num, units["flowcell_id"], sample_id,
                                  lane_id, None, fq1, fq2)
                    ru = ru._replace(rg_id=create_rg_id_from_ru(ru))
                    k = key_for_readunit(ru)
                    readunits[k] = dict(ru._asdict())
                    samples[sample_id].append(k)

            # write yaml per mux
            muxinfo_cfg = outprefix + mux_id + ".yaml"
            if args.dry_run:
                logger.warning("Skipped creation of %s", muxinfo_cfg)
            else:
                if os.path.exists(muxinfo_cfg) and not args.overwrite:
                    logger.fatal("Refusing to overwrite existing file %s", muxinfo_cfg)
                    sys.exit(1)
                with open(muxinfo_cfg, 'w') as fh:
                    fh.write(yaml.dump(dict(samples=samples), default_flow_style=False))
                    fh.write(yaml.dump(dict(readunits=readunits), default_flow_style=False))
                    logger.info("Created %s", muxinfo_cfg)