コード例 #1
0
ファイル: test_intrahost.py プロジェクト: pvanheus/viral-ngs
    def test_snpeff(self):
        temp_dir = tempfile.gettempdir()
        input_dir = util.file.get_test_input_path(self)

        ref_fasta      = os.path.join(input_dir,"ref-rabies-JQ685920.fasta")
        assembly_fasta = os.path.join(input_dir,"RBV16.fasta")
        isnv_calls     = os.path.join(input_dir,"vphaser2.RBV16.mapped.txt.gz")

        # align sample to reference to create MSA
        msa_fasta = util.file.mkstempfname('.fasta')
        expected_msa_fasta = os.path.join(input_dir,"msa.fasta")
        args = [ref_fasta, assembly_fasta, msa_fasta, "--localpair", "--preservecase"]
        args = interhost.parser_align_mafft(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        test.assert_equal_contents(self, msa_fasta, expected_msa_fasta)

        # merge (one) VCF to merged vcf
        merged_vcf = os.path.join(temp_dir,"merged.vcf.gz")
        expected_merged_vcf = os.path.join(input_dir,"merged.vcf.gz")
        args = [ref_fasta, merged_vcf, "--isnvs", isnv_calls, "--alignments", msa_fasta, "--strip_chr_version", "--parse_accession"]
        args = intrahost.parser_merge_to_vcf(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        vcf = util.vcf.VcfReader(merged_vcf)
        expected_vcf = util.vcf.VcfReader(expected_merged_vcf)
        rows = list(vcf.get())
        expected_rows = list(expected_vcf.get())
        #self.assertEqual(rows, expected_rows)

        # run snpEff against merged VCF to predict SNP effects
        eff_vcf = os.path.join(temp_dir,"ann_eff.vcf.gz")
        expected_eff_vcf = os.path.join(input_dir,"ann_eff.vcf.gz")
        args = [merged_vcf, "JQ685920", eff_vcf, "[email protected]"]
        with self.capsys.disabled():
            args = interhost.parser_snpEff(argparse.ArgumentParser()).parse_args(args)
            args.func_main(args)
            vcf = util.vcf.VcfReader(eff_vcf)
            expected_vcf = util.vcf.VcfReader(expected_eff_vcf)
            rows = list(vcf.get())
            expected_rows = list(expected_vcf.get())
        #self.assertEqual(rows, expected_rows)

        # create tabular iSNV output
        eff_txt = os.path.join(temp_dir,"ann_eff.txt.gz")
        expected_eff_txt = os.path.join(input_dir,"ann_eff.txt.gz")
        args = [eff_vcf, eff_txt]
        args = intrahost.parser_iSNV_table(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        for outrow, expectedrow in zip(util.file.read_tabfile(eff_txt),util.file.read_tabfile(expected_eff_txt)):
            for colout, colexpected in zip(outrow, expectedrow):
                # if it casts to float, perform approx comparison
                try:
                    f1=float(colout)
                    f2=float(colexpected)
                    self.assertAlmostEqual(f1, f1)
                except ValueError:
                    self.assertEqual(sorted(sorted(colout.split(","))), sorted(sorted(colexpected.split(","))))
コード例 #2
0
    def test_snpeff(self):
        temp_dir = tempfile.gettempdir()
        input_dir = util.file.get_test_input_path(self)

        ref_fasta      = os.path.join(input_dir,"ref-rabies-JQ685920.fasta")
        assembly_fasta = os.path.join(input_dir,"RBV16.fasta")
        isnv_calls     = os.path.join(input_dir,"vphaser2.RBV16.mapped.txt.gz")

        # align sample to reference to create MSA
        msa_fasta = util.file.mkstempfname('.fasta')
        expected_msa_fasta = os.path.join(input_dir,"msa.fasta")
        args = [ref_fasta, assembly_fasta, msa_fasta, "--localpair", "--preservecase"]
        args = interhost.parser_align_mafft(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        test.assert_equal_contents(self, msa_fasta, expected_msa_fasta)

        # merge (one) VCF to merged vcf
        merged_vcf = os.path.join(temp_dir,"merged.vcf.gz")
        expected_merged_vcf = os.path.join(input_dir,"merged.vcf.gz")
        args = [ref_fasta, merged_vcf, "--isnvs", isnv_calls, "--alignments", msa_fasta, "--strip_chr_version", "--parse_accession"]
        args = intrahost.parser_merge_to_vcf(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        vcf = util.vcf.VcfReader(merged_vcf)
        expected_vcf = util.vcf.VcfReader(expected_merged_vcf)
        rows = list(vcf.get())
        expected_rows = list(expected_vcf.get())
        #self.assertEqual(rows, expected_rows)

        # run snpEff against merged VCF to predict SNP effects
        eff_vcf = os.path.join(temp_dir,"ann_eff.vcf.gz")
        expected_eff_vcf = os.path.join(input_dir,"ann_eff.vcf.gz")
        args = [merged_vcf, "JQ685920", eff_vcf, "[email protected]"]
        args = interhost.parser_snpEff(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        vcf = util.vcf.VcfReader(eff_vcf)
        expected_vcf = util.vcf.VcfReader(expected_eff_vcf)
        rows = list(vcf.get())
        expected_rows = list(expected_vcf.get())
        #self.assertEqual(rows, expected_rows)

        # create tabular iSNV output
        eff_txt = os.path.join(temp_dir,"ann_eff.txt.gz")
        expected_eff_txt = os.path.join(input_dir,"ann_eff.txt.gz")
        args = [eff_vcf, eff_txt]
        args = intrahost.parser_iSNV_table(argparse.ArgumentParser()).parse_args(args)
        args.func_main(args)
        for outrow, expectedrow in zip(util.file.read_tabfile(eff_txt),util.file.read_tabfile(expected_eff_txt)):
            for colout, colexpected in zip(outrow, expectedrow):
                # if it casts to float, perform approx comparison
                try:
                    f1=float(colout)
                    f2=float(colexpected)
                    self.assertAlmostEqual(f1, f1)
                except ValueError:
                    self.assertEqual(sorted(sorted(colout.split(","))), sorted(sorted(colexpected.split(","))))
コード例 #3
0
 def run_and_get_vcf_rows(self):
     outVcf = util.file.mkstempfname('.vcf.gz')
     intrahost.merge_to_vcf(self.ref, outVcf,
         self.sample_order,
         list(self.dump_isnv_tmp_file(s) for s in self.sample_order),
         list(self.genomeFastas[s] for s in self.sample_order))
     with util.vcf.VcfReader(outVcf) as vcf:
         rows = list(vcf.get())
     return rows
コード例 #4
0
ファイル: test_intrahost.py プロジェクト: dnanexus/viral-ngs
    def run_and_get_vcf_rows(self, retree=1):
        outVcf = util.file.mkstempfname('.vcf.gz')

        self.multi_align_samples(retree=retree)

        seqIds = list(itertools.chain.from_iterable(self.sequence_order.values()))

        intrahost.merge_to_vcf(self.ref, outVcf, seqIds, list(self.dump_isnv_tmp_file(s) for s in self.sample_order),
                               self.alignedFastas)
        with util.vcf.VcfReader(outVcf) as vcf:
            rows = list(vcf.get())
        return rows
コード例 #5
0
    def run_and_get_vcf_rows(self, retree=1):
        outVcf = util.file.mkstempfname('.vcf.gz')

        self.multi_align_samples(retree=retree)

        seqIds = list(
            itertools.chain.from_iterable(self.sequence_order.values()))

        intrahost.merge_to_vcf(
            self.ref, outVcf, seqIds,
            list(self.dump_isnv_tmp_file(s) for s in self.sample_order),
            self.alignedFastas)
        with util.vcf.VcfReader(outVcf) as vcf:
            rows = list(vcf.get())
        return rows
コード例 #6
0
    def run_and_get_vcf_rows(self, retree=1, omit_samplenames=False):
        outVcf = util.file.mkstempfname('.vcf.gz')

        self.multi_align_samples(retree=retree)

        if not omit_samplenames:
            intrahost.merge_to_vcf(self.ref, outVcf, self.sample_order, list(self.dump_isnv_tmp_file(s) for s in self.sample_order),
                              self.alignedFastas)
        else:
            intrahost.merge_to_vcf(self.ref, outVcf, [], list(self.dump_isnv_tmp_file(s) for s in self.sample_order),
                              self.alignedFastas)


        with util.vcf.VcfReader(outVcf) as vcf:
            rows = list(vcf.get())
        return rows
コード例 #7
0
ファイル: test_intrahost.py プロジェクト: xuwei684/viral-ngs
    def run_and_get_vcf_rows(self, retree=1, omit_samplenames=False):
        outVcf = util.file.mkstempfname('.vcf.gz')

        self.multi_align_samples(retree=retree)

        if not omit_samplenames:
            intrahost.merge_to_vcf(
                self.ref, outVcf, self.sample_order,
                list(self.dump_isnv_tmp_file(s) for s in self.sample_order),
                self.alignedFastas)
        else:
            intrahost.merge_to_vcf(
                self.ref, outVcf, [],
                list(self.dump_isnv_tmp_file(s) for s in self.sample_order),
                self.alignedFastas)

        with util.vcf.VcfReader(outVcf) as vcf:
            rows = list(vcf.get())
        return rows