def vcf_summary(self): if not self._check_pargs(["project"]): return flist = find_samples(os.path.abspath(os.path.join(self.app.controller._meta.project_root, self.app.controller._meta.path_id)), **vars(self.pargs)) vcf_d = get_vcf_files(flist, **vars(self.pargs)) ## Traverse files, copy to result directory, run bgzip and tabix, and merge vcfs to one file outdir = os.path.join(os.path.abspath(os.path.join(self.app.controller._meta.project_root, self.app.controller._meta.path_id, "intermediate", "results", "vcf"))) if not os.path.exists(outdir): self.app.cmd.safe_makedir(outdir) for k, v in vcf_d.iteritems(): # FIXME: this should be memoized if os.path.exists("{}.tbi".format(v)): self.app.log.info("{}.tbi exists; skipping bgzip and tabix operations".format(v)) continue if not v.endswith(".gz"): ## bgzip self.app.log.info("Running bgzip on {}".format(v)) cl = ["bgzip", v] self.app.cmd.command(cl) # tabix self.app.log.info("Running tabix on {}.gz".format(v)) cl = ["tabix", "-f", "-p", "vcf", "{}.gz".format(v)] self.app.cmd.command(cl) # Make all-variants file all_variants = os.path.join(outdir, "all-variants.vcf") cl = ['vcf-merge'] + vcf_d.values()# + [">", all_variants] if not os.path.exists(all_variants): self.app.log.info("Merging vcf files {} to {}".format(vcf_d.values() ,all_variants)) output = self.app.cmd.command(cl) with open(all_variants, "w") as fh: fh.write(output) cl = ['bgzip', all_variants] self.app.cmd.command(cl) cl = ['tabix', "-f", "-p", "vcf", "{}.gz".format(all_variants)] self.app.cmd.command(cl)
def vcf_summary(self): if not self._check_pargs(["project"]): return flist = find_samples( os.path.abspath( os.path.join(self.app.controller._meta.project_root, self.app.controller._meta.path_id)), **vars(self.pargs)) vcf_d = get_vcf_files(flist, **vars(self.pargs)) ## Traverse files, copy to result directory, run bgzip and tabix, and merge vcfs to one file outdir = os.path.join( os.path.abspath( os.path.join(self.app.controller._meta.project_root, self.app.controller._meta.path_id, "intermediate", "results", "vcf"))) vcf_out = [] if not os.path.exists(outdir): self.app.cmd.safe_makedir(outdir) for k, v in vcf_d.iteritems(): # FIXME: this should be memoized if os.path.exists("{}.tbi".format(v)): self.app.log.info( "{}.tbi exists; skipping bgzip and tabix operations". format(v)) vcf_out.append(v) continue if not v.endswith(".gz"): ## bgzip self.app.log.info("Running bgzip on {}".format(v)) cl = ["bgzip", v] self.app.cmd.command(cl) vcf_out.append("{}.gz".format(v)) else: vcf_out.append(v) # tabix self.app.log.info("Running tabix on {}.gz".format(v)) cl = ["tabix", "-f", "-p", "vcf", "{}.gz".format(v)] self.app.cmd.command(cl) # Make all-variants file all_variants = os.path.join(outdir, "all-variants.vcf") cl = ['vcf-merge'] + vcf_out if not os.path.exists(all_variants): self.app.log.debug("Merging vcf files {} to {}".format( vcf_out, all_variants)) self.app.log.info("Merging {} vcf files to {}".format( len(vcf_out), all_variants)) output = self.app.cmd.command(cl) with open(all_variants, "w") as fh: fh.write(output) cl = ['bgzip', all_variants] self.app.cmd.command(cl) cl = ['tabix', "-f", "-p", "vcf", "{}.gz".format(all_variants)] self.app.cmd.command(cl)
def vcf_summary(self): if not self._check_pargs(["project"]): return flist = find_samples( os.path.abspath(os.path.join(self.app.controller._meta.project_root, self.app.controller._meta.path_id)), **vars(self.pargs) ) vcf_d = get_vcf_files(flist) ## Traverse files, copy to result directory, run bgzip and tabix, and merge vcfs to one file outdir = os.path.join( os.path.abspath( os.path.join( self.app.controller._meta.project_root, self.app.controller._meta.path_id, "intermediate", "results", "vcf", ) ) ) if not os.path.exists(outdir): self.app.cmd.safe_makedir(outdir) for k, v in vcf_d.iteritems(): print v if v.endswith(".gz"): tgt = os.path.join(outdir, os.path.basename(v).replace("TOTAL", "TOTAL_{}".format(k))) v = v.replace(".gz", "") tgt = tgt.replace(".gz", "") else: ## bgzip LOG.info("Running bgzip on {}".format(v)) cl = ["bgzip", v] self.app.cmd.command(cl) ##if not os.path.exists("{}.gz.tbi"): ## tabix LOG.info("Running tabix on {}.gz".format(v)) cl = ["tabix", "-f", "-p", "vcf", "{}.gz".format(v)] self.app.cmd.command(cl) self.app.cmd.link("{}.gz".format(v), "{}.gz".format(tgt)) self.app.cmd.link("{}.gz.tbi".format(v), "{}.gz.tbi".format(tgt))
def test_summarize_variants(self): """Test summarizing variants""" flist = find_samples(j_doe_00_01) vcf_d = get_vcf_files(flist)