예제 #1
0
파일: delly.py 프로젝트: zhaotao1987/jcvi
def mito(args):
    """
    %prog mito chrM.fa input.bam

    Identify mitochondrial deletions.
    """
    p = OptionParser(mito.__doc__)
    p.set_aws_opts(store="hli-mv-data-science/htang/mito-deletions")
    p.add_option("--realignonly", default=False, action="store_true",
                 help="Realign only")
    p.add_option("--svonly", default=False, action="store_true",
                 help="Run Realign => SV calls only")
    p.add_option("--support", default=1, type="int",
                 help="Minimum number of supporting reads")
    p.set_home("speedseq", default="/mnt/software/speedseq/bin")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    chrMfa, bamfile = args
    store = opts.output_path
    cleanup = not opts.nocleanup

    if not op.exists(chrMfa):
        logging.debug("File `{}` missing. Exiting.".format(chrMfa))
        return

    chrMfai = chrMfa + ".fai"
    if not op.exists(chrMfai):
        cmd = "samtools index {}".format(chrMfa)
        sh(cmd)

    if not bamfile.endswith(".bam"):
        bamfiles = [x.strip() for x in open(bamfile)]
    else:
        bamfiles = [bamfile]

    if store:
        computed = ls_s3(store)
        computed = [op.basename(x).split('.')[0] for x in computed if \
                        x.endswith(".depth")]
        remaining_samples = [x for x in bamfiles \
                    if op.basename(x).split(".")[0] not in computed]

        logging.debug("Already computed on `{}`: {}".\
                        format(store, len(bamfiles) - len(remaining_samples)))
        bamfiles = remaining_samples

    logging.debug("Total samples: {}".format(len(bamfiles)))

    for bamfile in bamfiles:
        run_mito(chrMfa, bamfile, opts,
                 realignonly=opts.realignonly,
                 svonly=opts.svonly,
                 store=store, cleanup=cleanup)
예제 #2
0
파일: str.py 프로젝트: wroldwiedbwe/jcvi
def batchlobstr(args):
    """
    %prog batchlobstr samples.csv

    Run lobSTR sequentially on list of samples. Each line contains:
    sample-name,s3-location
    """
    p = OptionParser(batchlobstr.__doc__)
    p.add_option("--sep", default=",", help="Separator for building commandline")
    p.set_home("lobstr", default="s3://hli-mv-data-science/htang/str-build/lobSTR/")
    p.set_aws_opts(store="hli-mv-data-science/htang/str-data")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (samplesfile,) = args
    store = opts.output_path
    computed = ls_s3(store)
    fp = open(samplesfile)
    skipped = total = 0
    for row in fp:
        total += 1
        sample, s3file = row.strip().split(",")[:2]
        exec_id, sample_id = sample.split("_")
        bamfile = s3file.replace(".gz", "").replace(".vcf", ".bam")

        gzfile = sample + ".{0}.vcf.gz".format("hg38")
        if gzfile in computed:
            skipped += 1
            continue

        print(
            opts.sep.join(
                "python -m jcvi.variation.str lobstr".split()
                + [
                    "hg38",
                    "--input_bam_path",
                    bamfile,
                    "--output_path",
                    store,
                    "--sample_id",
                    sample_id,
                    "--workflow_execution_id",
                    exec_id,
                    "--lobstr_home",
                    opts.lobstr_home,
                    "--workdir",
                    opts.workdir,
                ]
            )
        )
    fp.close()
    logging.debug("Total skipped: {0}".format(percentage(skipped, total)))
예제 #3
0
파일: str.py 프로젝트: pombredanne/jcvi
def batchlobstr(args):
    """
    %prog batchlobstr samples.csv

    Run lobSTR sequentially on list of samples. Each line contains:
    sample-name,s3-location
    """
    p = OptionParser(batchlobstr.__doc__)
    p.add_option("--sep", default=",", help="Separator for building commandline")
    p.set_home("lobstr", default="s3://hli-mv-data-science/htang/str-build/lobSTR/")
    p.set_aws_opts(store="hli-mv-data-science/htang/str-data")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    samplesfile, = args
    store = opts.output_path
    computed = ls_s3(store)
    fp = open(samplesfile)
    skipped = total = 0
    for row in fp:
        total += 1
        sample, s3file = row.strip().split(",")[:2]
        exec_id, sample_id = sample.split("_")
        bamfile = s3file.replace(".gz", "").replace(".vcf", ".bam")

        gzfile = sample + ".{0}.vcf.gz".format("hg38")
        if gzfile in computed:
            skipped += 1
            continue

        print opts.sep.join(
            "python -m jcvi.variation.str lobstr".split()
            + [
                "hg38",
                "--input_bam_path",
                bamfile,
                "--output_path",
                store,
                "--sample_id",
                sample_id,
                "--workflow_execution_id",
                exec_id,
                "--lobstr_home",
                opts.lobstr_home,
                "--workdir",
                opts.workdir,
            ]
        )
    fp.close()
    logging.debug("Total skipped: {0}".format(percentage(skipped, total)))
예제 #4
0
def batchlobstr(args):
    """
    %prog batchlobstr samples.csv

    Run lobSTR sequentially on list of samples. Each line contains:
    sample-name,s3-location
    """
    p = OptionParser(batchlobstr.__doc__)
    p.add_option("--sep",
                 default=",",
                 help="Separator for building commandline")
    p.set_aws_opts(store="hli-mv-data-science/htang/str")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    samplesfile, = args
    store = opts.store
    computed = ls_s3(store)
    fp = open(samplesfile)
    skipped = total = 0
    for row in fp:
        total += 1
        sample, s3file = row.strip().split(",")[:2]
        bamfile = s3file.replace(".gz", "").replace(".vcf", ".bam")

        gzfile = sample + ".{0}.vcf.gz".format("hg38")
        if gzfile in computed:
            skipped += 1
            continue

        print opts.sep.join("python -m jcvi.variation.str lobstr".split() + \
                            [bamfile, "hg38", "--prefix", sample,
                            "--workdir", opts.workdir, "--cleanup",
                            "--store", opts.store])
    fp.close()
    logging.debug("Total skipped: {0}".format(percentage(skipped, total)))