def test_snpeff(self): temp_dir = tempfile.gettempdir() input_dir = util.file.get_test_input_path(self) ref_fasta = os.path.join(input_dir,"ref-rabies-JQ685920.fasta") assembly_fasta = os.path.join(input_dir,"RBV16.fasta") isnv_calls = os.path.join(input_dir,"vphaser2.RBV16.mapped.txt.gz") # align sample to reference to create MSA msa_fasta = util.file.mkstempfname('.fasta') expected_msa_fasta = os.path.join(input_dir,"msa.fasta") args = [ref_fasta, assembly_fasta, msa_fasta, "--localpair", "--preservecase"] args = interhost.parser_align_mafft(argparse.ArgumentParser()).parse_args(args) args.func_main(args) test.assert_equal_contents(self, msa_fasta, expected_msa_fasta) # merge (one) VCF to merged vcf merged_vcf = os.path.join(temp_dir,"merged.vcf.gz") expected_merged_vcf = os.path.join(input_dir,"merged.vcf.gz") args = [ref_fasta, merged_vcf, "--isnvs", isnv_calls, "--alignments", msa_fasta, "--strip_chr_version", "--parse_accession"] args = intrahost.parser_merge_to_vcf(argparse.ArgumentParser()).parse_args(args) args.func_main(args) vcf = util.vcf.VcfReader(merged_vcf) expected_vcf = util.vcf.VcfReader(expected_merged_vcf) rows = list(vcf.get()) expected_rows = list(expected_vcf.get()) #self.assertEqual(rows, expected_rows) # run snpEff against merged VCF to predict SNP effects eff_vcf = os.path.join(temp_dir,"ann_eff.vcf.gz") expected_eff_vcf = os.path.join(input_dir,"ann_eff.vcf.gz") args = [merged_vcf, "JQ685920", eff_vcf, "[email protected]"] with self.capsys.disabled(): args = interhost.parser_snpEff(argparse.ArgumentParser()).parse_args(args) args.func_main(args) vcf = util.vcf.VcfReader(eff_vcf) expected_vcf = util.vcf.VcfReader(expected_eff_vcf) rows = list(vcf.get()) expected_rows = list(expected_vcf.get()) #self.assertEqual(rows, expected_rows) # create tabular iSNV output eff_txt = os.path.join(temp_dir,"ann_eff.txt.gz") expected_eff_txt = os.path.join(input_dir,"ann_eff.txt.gz") args = [eff_vcf, eff_txt] args = intrahost.parser_iSNV_table(argparse.ArgumentParser()).parse_args(args) args.func_main(args) for outrow, expectedrow in zip(util.file.read_tabfile(eff_txt),util.file.read_tabfile(expected_eff_txt)): for colout, colexpected in zip(outrow, expectedrow): # if it casts to float, perform approx comparison try: f1=float(colout) f2=float(colexpected) self.assertAlmostEqual(f1, f1) except ValueError: self.assertEqual(sorted(sorted(colout.split(","))), sorted(sorted(colexpected.split(","))))
def test_snpeff(self): temp_dir = tempfile.gettempdir() input_dir = util.file.get_test_input_path(self) ref_fasta = os.path.join(input_dir,"ref-rabies-JQ685920.fasta") assembly_fasta = os.path.join(input_dir,"RBV16.fasta") isnv_calls = os.path.join(input_dir,"vphaser2.RBV16.mapped.txt.gz") # align sample to reference to create MSA msa_fasta = util.file.mkstempfname('.fasta') expected_msa_fasta = os.path.join(input_dir,"msa.fasta") args = [ref_fasta, assembly_fasta, msa_fasta, "--localpair", "--preservecase"] args = interhost.parser_align_mafft(argparse.ArgumentParser()).parse_args(args) args.func_main(args) test.assert_equal_contents(self, msa_fasta, expected_msa_fasta) # merge (one) VCF to merged vcf merged_vcf = os.path.join(temp_dir,"merged.vcf.gz") expected_merged_vcf = os.path.join(input_dir,"merged.vcf.gz") args = [ref_fasta, merged_vcf, "--isnvs", isnv_calls, "--alignments", msa_fasta, "--strip_chr_version", "--parse_accession"] args = intrahost.parser_merge_to_vcf(argparse.ArgumentParser()).parse_args(args) args.func_main(args) vcf = util.vcf.VcfReader(merged_vcf) expected_vcf = util.vcf.VcfReader(expected_merged_vcf) rows = list(vcf.get()) expected_rows = list(expected_vcf.get()) #self.assertEqual(rows, expected_rows) # run snpEff against merged VCF to predict SNP effects eff_vcf = os.path.join(temp_dir,"ann_eff.vcf.gz") expected_eff_vcf = os.path.join(input_dir,"ann_eff.vcf.gz") args = [merged_vcf, "JQ685920", eff_vcf, "[email protected]"] args = interhost.parser_snpEff(argparse.ArgumentParser()).parse_args(args) args.func_main(args) vcf = util.vcf.VcfReader(eff_vcf) expected_vcf = util.vcf.VcfReader(expected_eff_vcf) rows = list(vcf.get()) expected_rows = list(expected_vcf.get()) #self.assertEqual(rows, expected_rows) # create tabular iSNV output eff_txt = os.path.join(temp_dir,"ann_eff.txt.gz") expected_eff_txt = os.path.join(input_dir,"ann_eff.txt.gz") args = [eff_vcf, eff_txt] args = intrahost.parser_iSNV_table(argparse.ArgumentParser()).parse_args(args) args.func_main(args) for outrow, expectedrow in zip(util.file.read_tabfile(eff_txt),util.file.read_tabfile(expected_eff_txt)): for colout, colexpected in zip(outrow, expectedrow): # if it casts to float, perform approx comparison try: f1=float(colout) f2=float(colexpected) self.assertAlmostEqual(f1, f1) except ValueError: self.assertEqual(sorted(sorted(colout.split(","))), sorted(sorted(colexpected.split(","))))
def run_and_get_vcf_rows(self): outVcf = util.file.mkstempfname('.vcf.gz') intrahost.merge_to_vcf(self.ref, outVcf, self.sample_order, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), list(self.genomeFastas[s] for s in self.sample_order)) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows
def run_and_get_vcf_rows(self, retree=1): outVcf = util.file.mkstempfname('.vcf.gz') self.multi_align_samples(retree=retree) seqIds = list(itertools.chain.from_iterable(self.sequence_order.values())) intrahost.merge_to_vcf(self.ref, outVcf, seqIds, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows
def run_and_get_vcf_rows(self, retree=1): outVcf = util.file.mkstempfname('.vcf.gz') self.multi_align_samples(retree=retree) seqIds = list( itertools.chain.from_iterable(self.sequence_order.values())) intrahost.merge_to_vcf( self.ref, outVcf, seqIds, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows
def run_and_get_vcf_rows(self, retree=1, omit_samplenames=False): outVcf = util.file.mkstempfname('.vcf.gz') self.multi_align_samples(retree=retree) if not omit_samplenames: intrahost.merge_to_vcf(self.ref, outVcf, self.sample_order, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) else: intrahost.merge_to_vcf(self.ref, outVcf, [], list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows
def run_and_get_vcf_rows(self, retree=1, omit_samplenames=False): outVcf = util.file.mkstempfname('.vcf.gz') self.multi_align_samples(retree=retree) if not omit_samplenames: intrahost.merge_to_vcf( self.ref, outVcf, self.sample_order, list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) else: intrahost.merge_to_vcf( self.ref, outVcf, [], list(self.dump_isnv_tmp_file(s) for s in self.sample_order), self.alignedFastas) with util.vcf.VcfReader(outVcf) as vcf: rows = list(vcf.get()) return rows