def test_no_variants_returns_ref_chroms(self, mock_var_file, mock_load_fasta): recs = [] mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual("AGCAGCCCCGGG", converter._get_string())
def test_one_variant_chroms_with_no_vars_in_same_order( self, mock_var_file, mock_load_fasta): recs = [_MockVcfRecord(2, "G", ["CAAA", "CA"], chrom="ref3")] mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual("AGCAGCCCCG5G6CAAA6CA6G", converter._get_string())
def test_filter_pass_record_kept(self, mock_var_file, mock_load_fasta): recs = [_MockVcfRecord(2, "A", "G", chrom="JAC")] # Default filter: PASS mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual(converter._get_string(), "A5A6G6C")
def test_adjacent_snps_kept(self, mock_var_file, mock_load_fasta): recs = [ _MockVcfRecord(1, "C", ["G"], chrom="ref2"), _MockVcfRecord(2, "C", ["A"], chrom="ref2"), ] mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual("AGCAGC5C6G67C8A8CGGG", converter._get_string())
def test_two_snps_same_chrom(self, mock_var_file, mock_load_fasta): recs = [ _MockVcfRecord(1, "A", "G", chrom="ref1"), _MockVcfRecord(3, "C", ["T", "G"], chrom="ref1"), ] mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual("5A6G6G7C8T8G8AGCCCCGGG", converter._get_string())
def test_snp_inside_del(self, mock_var_file, mock_load_fasta): recs = [ _MockVcfRecord(2, "T", ["G"], chrom="ref1"), _MockVcfRecord(2, "T", ["C"], chrom="ref1"), ] mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual("T5T6G6TTCCC", converter._get_string())
def test_snps_at_same_position(self, mock_var_file, mock_load_fasta): recs = [ _MockVcfRecord(1, "TTTT", ["T"], chrom="ref1"), _MockVcfRecord(2, "T", ["C"], chrom="ref1"), ] mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual("5TTTT6T6CCC", converter._get_string())
def test_one_ins_and_one_del_diff_chroms(self, mock_var_file, mock_load_fasta): recs = [ _MockVcfRecord(3, "C", ["CGG"], chrom="ref1"), _MockVcfRecord(1, "CCC", ["C"], chrom="ref2"), ] mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual("AG5C6CGG6AGC7CCC8C8GGG", converter._get_string())
def build_from_vcfs(report, action, build_paths, args): """Calls utility that converts a vcf and fasta reference into a linear prg.""" if args.no_vcf_clustering: _skip_cluster_vcf_records(report, "skip_vcf_record_clustering", build_paths) else: # We also do this if only a single one is provided, to deal with overlapping records. cluster_vcf_records(report, "vcf_record_clustering", build_paths) built_vcf = build_paths.built_vcf log.info(f"Running {action} on {built_vcf}") converter = Vcf_to_prg(built_vcf, build_paths.ref, build_paths.prg, mode="normal") converter._write_bytes() num_recs_in_vcf = _count_vcf_record_lines(built_vcf) assert num_recs_in_vcf == converter.num_sites, log.error( f"Mismatch between number of vcf records in {built_vcf}" f"({num_recs_in_vcf} and number of variant sites in" f"PRG string ({converter.num_sites}.\n" f"Please report this to developers.")
def test_nonACGT_fails_to_convert(self, mock_var_file, mock_load_fasta): mock_var_file.return_value.fetch.return_value = iter([]) mock_load_fasta.return_value = {} converter = Vcf_to_prg("", "", "") with self.assertRaises(ValueError): converter._to_bytes("N")
def test_filter_nonpass_record_skipped(self, mock_var_file, mock_load_fasta): recs = [_MockVcfRecord(2, "A", "G", filter={"LOW_QUAL": ""})] mock_var_file.return_value.fetch.return_value = iter(recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "")
def test_integer_representation(self, mock_var_file, mock_load_fasta): mock_var_file.return_value.fetch.return_value = iter(self.recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "") self.assertEqual([5, 1, 6, 3, 6, 2, 1, 2, 7, 1, 8, 1, 1, 1, 8, 1], converter._get_ints())
def test_legacy_representation(self, mock_var_file, mock_load_fasta): mock_var_file.return_value.fetch.return_value = iter(self.recs) mock_load_fasta.return_value = self.chroms converter = Vcf_to_prg("", "", "", mode="legacy") self.assertEqual("5A6G5CAC7A8AAA7A", converter._get_string())
def _run(self, mode="normal", check=False): converter = Vcf_to_prg(str(self.vcf_file), str(self.ref_file), self.outfile_prefix, mode=mode) converter._write_string()