Ejemplo n.º 1
0
 def vcf_summary(self):
     if not self._check_pargs(["project"]):
         return
     flist = find_samples(os.path.abspath(os.path.join(self.app.controller._meta.project_root, self.app.controller._meta.path_id)), **vars(self.pargs))
     vcf_d = get_vcf_files(flist, **vars(self.pargs))
     ## Traverse files, copy to result directory, run bgzip and tabix, and merge vcfs to one file
     outdir = os.path.join(os.path.abspath(os.path.join(self.app.controller._meta.project_root, self.app.controller._meta.path_id, "intermediate", "results", "vcf")))
     if not os.path.exists(outdir):
         self.app.cmd.safe_makedir(outdir)
     for k, v in vcf_d.iteritems():
         # FIXME: this should be memoized
         if os.path.exists("{}.tbi".format(v)):
             self.app.log.info("{}.tbi exists; skipping bgzip and tabix operations".format(v))
             continue
         if not v.endswith(".gz"):
             ## bgzip
             self.app.log.info("Running bgzip on {}".format(v))
             cl = ["bgzip", v]
             self.app.cmd.command(cl)
         # tabix
         self.app.log.info("Running tabix on {}.gz".format(v))
         cl = ["tabix", "-f", "-p", "vcf", "{}.gz".format(v)]
         self.app.cmd.command(cl)
     # Make all-variants file
     all_variants = os.path.join(outdir, "all-variants.vcf")
     cl = ['vcf-merge'] + vcf_d.values()# + [">",  all_variants]
     if not os.path.exists(all_variants):
         self.app.log.info("Merging vcf files {} to {}".format(vcf_d.values() ,all_variants))
         output = self.app.cmd.command(cl)
         with open(all_variants, "w") as fh:
             fh.write(output)
     cl = ['bgzip', all_variants]
     self.app.cmd.command(cl)
     cl = ['tabix', "-f", "-p", "vcf", "{}.gz".format(all_variants)]
     self.app.cmd.command(cl)
Ejemplo n.º 2
0
 def vcf_summary(self):
     if not self._check_pargs(["project"]):
         return
     flist = find_samples(
         os.path.abspath(
             os.path.join(self.app.controller._meta.project_root,
                          self.app.controller._meta.path_id)),
         **vars(self.pargs))
     vcf_d = get_vcf_files(flist, **vars(self.pargs))
     ## Traverse files, copy to result directory, run bgzip and tabix, and merge vcfs to one file
     outdir = os.path.join(
         os.path.abspath(
             os.path.join(self.app.controller._meta.project_root,
                          self.app.controller._meta.path_id, "intermediate",
                          "results", "vcf")))
     vcf_out = []
     if not os.path.exists(outdir):
         self.app.cmd.safe_makedir(outdir)
     for k, v in vcf_d.iteritems():
         # FIXME: this should be memoized
         if os.path.exists("{}.tbi".format(v)):
             self.app.log.info(
                 "{}.tbi exists; skipping bgzip and tabix operations".
                 format(v))
             vcf_out.append(v)
             continue
         if not v.endswith(".gz"):
             ## bgzip
             self.app.log.info("Running bgzip on {}".format(v))
             cl = ["bgzip", v]
             self.app.cmd.command(cl)
             vcf_out.append("{}.gz".format(v))
         else:
             vcf_out.append(v)
         # tabix
         self.app.log.info("Running tabix on {}.gz".format(v))
         cl = ["tabix", "-f", "-p", "vcf", "{}.gz".format(v)]
         self.app.cmd.command(cl)
     # Make all-variants file
     all_variants = os.path.join(outdir, "all-variants.vcf")
     cl = ['vcf-merge'] + vcf_out
     if not os.path.exists(all_variants):
         self.app.log.debug("Merging vcf files {} to {}".format(
             vcf_out, all_variants))
         self.app.log.info("Merging {} vcf files to {}".format(
             len(vcf_out), all_variants))
         output = self.app.cmd.command(cl)
         with open(all_variants, "w") as fh:
             fh.write(output)
         cl = ['bgzip', all_variants]
         self.app.cmd.command(cl)
         cl = ['tabix', "-f", "-p", "vcf", "{}.gz".format(all_variants)]
         self.app.cmd.command(cl)
Ejemplo n.º 3
0
 def vcf_summary(self):
     if not self._check_pargs(["project"]):
         return
     flist = find_samples(
         os.path.abspath(os.path.join(self.app.controller._meta.project_root, self.app.controller._meta.path_id)),
         **vars(self.pargs)
     )
     vcf_d = get_vcf_files(flist)
     ## Traverse files, copy to result directory, run bgzip and tabix, and merge vcfs to one file
     outdir = os.path.join(
         os.path.abspath(
             os.path.join(
                 self.app.controller._meta.project_root,
                 self.app.controller._meta.path_id,
                 "intermediate",
                 "results",
                 "vcf",
             )
         )
     )
     if not os.path.exists(outdir):
         self.app.cmd.safe_makedir(outdir)
     for k, v in vcf_d.iteritems():
         print v
         if v.endswith(".gz"):
             tgt = os.path.join(outdir, os.path.basename(v).replace("TOTAL", "TOTAL_{}".format(k)))
             v = v.replace(".gz", "")
             tgt = tgt.replace(".gz", "")
         else:
             ## bgzip
             LOG.info("Running bgzip on {}".format(v))
             cl = ["bgzip", v]
             self.app.cmd.command(cl)
         ##if not os.path.exists("{}.gz.tbi"):
         ## tabix
         LOG.info("Running tabix on {}.gz".format(v))
         cl = ["tabix", "-f", "-p", "vcf", "{}.gz".format(v)]
         self.app.cmd.command(cl)
         self.app.cmd.link("{}.gz".format(v), "{}.gz".format(tgt))
         self.app.cmd.link("{}.gz.tbi".format(v), "{}.gz.tbi".format(tgt))
Ejemplo n.º 4
0
 def test_summarize_variants(self):
     """Test summarizing variants"""
     flist = find_samples(j_doe_00_01)
     vcf_d = get_vcf_files(flist)
Ejemplo n.º 5
0
 def test_summarize_variants(self):
     """Test summarizing variants"""
     flist = find_samples(j_doe_00_01)
     vcf_d = get_vcf_files(flist)