def locus(args): """ %prog locus bamfile Extract selected locus from a list of TREDs for validation, and run lobSTR. """ from jcvi.formats.sam import get_minibam # See `Format-lobSTR-database.ipynb` for a list of TREDs for validation INCLUDE = [ "HD", "SBMA", "SCA1", "SCA2", "SCA8", "SCA17", "DM1", "DM2", "FXTAS" ] db_choices = ("hg38", "hg19") p = OptionParser(locus.__doc__) p.add_option("--tred", choices=INCLUDE, help="TRED name") p.add_option("--ref", choices=db_choices, default="hg38", help="Reference genome") p.set_home("lobstr") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) (bamfile, ) = args ref = opts.ref lhome = opts.lobstr_home tred = opts.tred tredsfile = datafile("TREDs.meta.csv") tf = pd.read_csv(tredsfile, index_col=0) row = tf.ix[tred] tag = "repeat_location" ldb = "TREDs" if ref == "hg19": tag += "." + ref ldb += "-" + ref seqid, start_end = row[tag].split(":") PAD = 1000 start, end = start_end.split("-") start, end = int(start) - PAD, int(end) + PAD region = "{}:{}-{}".format(seqid, start, end) minibamfile = get_minibam(bamfile, region) c = seqid.replace("chr", "") cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, ldb) sh(cmd) parser = LobSTRvcf(columnidsfile=None) parser.parse(vcf, filtered=False) items = parser.items() if not items: print("No entry found!", file=sys.stderr) return k, v = parser.items()[0] print("{} => {}".format(tred, v.replace(",", "/")), file=sys.stderr)
def locus(args): """ %prog locus bamfile Extract selected locus from a list of TREDs for validation, and run lobSTR. """ from jcvi.formats.sam import get_minibam # See `Format-lobSTR-database.ipynb` for a list of TREDs for validation INCLUDE = ["HD", "SBMA", "SCA1", "SCA2", "SCA8", "SCA17", "DM1", "DM2", "FXTAS"] db_choices = ("hg38", "hg19") p = OptionParser(locus.__doc__) p.add_option("--tred", choices=INCLUDE, help="TRED name") p.add_option("--ref", choices=db_choices, default="hg38", help="Reference genome") p.set_home("lobstr") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) bamfile, = args ref = opts.ref lhome = opts.lobstr_home tred = opts.tred tredsfile = datafile("TREDs.meta.csv") tf = pd.read_csv(tredsfile, index_col=0) row = tf.ix[tred] tag = "repeat_location" ldb = "TREDs" if ref == "hg19": tag += "." + ref ldb += "-" + ref seqid, start_end = row[tag].split(":") PAD = 1000 start, end = start_end.split('-') start, end = int(start) - PAD, int(end) + PAD region = "{}:{}-{}".format(seqid, start, end) minibamfile = get_minibam(bamfile, region) c = seqid.replace("chr", "") cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, ldb) sh(cmd) parser = LobSTRvcf(columnidsfile=None) parser.parse(vcf, filtered=False) items = parser.items() if not items: print("No entry found!", file=sys.stderr) return k, v = parser.items()[0] print("{} => {}".format(tred, v.replace(',', '/')), file=sys.stderr)
def locus(args): """ %prog locus bamfile Extract selected locus from a list of TREDs for validation, and run lobSTR. """ from jcvi.formats.sam import get_minibam # See `Format-lobSTR-database.ipynb` for a list of TREDs for validation INCLUDE = ["HD", "SBMA", "SCA1", "SCA2", "SCA8", "SCA17", "DM1", "DM2"] p = OptionParser(locus.__doc__) p.add_option("--tred", choices=INCLUDE, help="TRED name") p.set_home("lobstr") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) bamfile, = args lhome = opts.lobstr_home tred = opts.tred tredsfile = op.join(datadir, "TREDs.meta.csv") tf = pd.read_csv(tredsfile, index_col=0) row = tf.ix[tred] seqid, start_end = row["repeat_location"].split(":") PAD = 1000 start, end = start_end.split('-') start, end = int(start) - PAD, int(end) + PAD region = "{}:{}-{}".format(seqid, start, end) minibamfile = get_minibam(bamfile, region) c = seqid.replace("chr", "") cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, "TREDs") sh(cmd) parser = LobSTRvcf(columnidsfile=None) parser.parse(vcf, filtered=False) k, v = parser.items()[0] print >> sys.stderr, "{} => {}".format(tred, v.replace(',', '/'))
def htt(args): """ %prog htt bamfile chr4:3070000-3080000 Extract HTT region and run lobSTR. """ from jcvi.formats.sam import get_minibam p = OptionParser(htt.__doc__) p.set_home("lobstr") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) bamfile, region = args lhome = opts.lobstr_home minibamfile = get_minibam(bamfile, region) c = region.split(":")[0].replace("chr", "") cmd, vcf = allelotype_on_chr(minibamfile, c, lhome, "hg38") sh(cmd)
def run_mito( chrMfa, bamfile, opts, realignonly=False, svonly=False, store=None, cleanup=False ): from jcvi.formats.sam import get_minibam region = "chrM" minibam = op.basename(bamfile).replace(".bam", ".{}.bam".format(region)) if not op.exists(minibam): get_minibam(bamfile, region) else: logging.debug("{} found. Skipped.".format(minibam)) speedseq_bin = op.join(opts.speedseq_home, "speedseq") realign = minibam.rsplit(".", 1)[0] + ".realign" realignbam = realign + ".bam" margs = " -v -t {} -o {}".format(opts.cpus, realign) if need_update(minibam, realign + ".bam"): cmd = speedseq_bin + " realign" cmd += margs cmd += " {} {}".format(chrMfa, minibam) sh(cmd) else: logging.debug("{} found. Skipped.".format(realignbam)) if realignonly: return depthfile = realign + ".depth" if need_update(realignbam, depthfile): coverage( [ chrMfa, realignbam, "--nosort", "--format=coverage", "--outfile={}".format(depthfile), ] ) if store: push_to_s3(store, depthfile) vcffile = realign + ".sv.vcf.gz" if need_update(realignbam, vcffile): cmd = speedseq_bin + " sv" cmd += margs cmd += " -R {}".format(chrMfa) cmd += " -m {}".format(opts.support) cmd += " -B {} -D {} -S {}".format( realignbam, realign + ".discordants.bam", realign + ".splitters.bam" ) sh(cmd) else: logging.debug("{} found. Skipped.".format(vcffile)) if store: push_to_s3(store, vcffile) if svonly: if cleanup: do_cleanup(minibam, realignbam) return piledriver = realign + ".piledriver" if need_update(realignbam, piledriver): cmd = "bamtools piledriver -fasta {}".format(chrMfa) cmd += " -in {}".format(realignbam) sh(cmd, outfile=piledriver) if store: push_to_s3(store, piledriver) if cleanup: do_cleanup(minibam, realignbam)
def run_mito(chrMfa, bamfile, opts, realignonly=False, svonly=False, store=None, cleanup=False): from jcvi.formats.sam import get_minibam region = "chrM" minibam = op.basename(bamfile).replace(".bam", ".{}.bam".format(region)) if not op.exists(minibam): get_minibam(bamfile, region) else: logging.debug("{} found. Skipped.".format(minibam)) speedseq_bin = op.join(opts.speedseq_home, "speedseq") realign = minibam.rsplit(".", 1)[0] + ".realign" realignbam = realign + ".bam" margs = " -v -t {} -o {}".format(opts.cpus, realign) if need_update(minibam, realign + ".bam"): cmd = speedseq_bin + " realign" cmd += margs cmd += " {} {}".format(chrMfa, minibam) sh(cmd) else: logging.debug("{} found. Skipped.".format(realignbam)) if realignonly: return depthfile = realign + ".depth" if need_update(realignbam, depthfile): coverage([chrMfa, realignbam, "--nosort", "--format=coverage", "--outfile={}".format(depthfile)]) if store: push_to_s3(store, depthfile) vcffile = realign + ".sv.vcf.gz" if need_update(realignbam, vcffile): cmd = speedseq_bin + " sv" cmd += margs cmd += " -R {}".format(chrMfa) cmd += " -m {}".format(opts.support) cmd += " -B {} -D {} -S {}".format(realignbam, realign + ".discordants.bam", realign + ".splitters.bam") sh(cmd) else: logging.debug("{} found. Skipped.".format(vcffile)) if store: push_to_s3(store, vcffile) if svonly: if cleanup: do_cleanup(minibam, realignbam) return piledriver = realign + ".piledriver" if need_update(realignbam, piledriver): cmd = "bamtools piledriver -fasta {}".format(chrMfa) cmd += " -in {}".format(realignbam) sh(cmd, outfile=piledriver) if store: push_to_s3(store, piledriver) if cleanup: do_cleanup(minibam, realignbam)