コード例 #1
0
 def test___get_query_name_to_best_record___one_query_name_with_several_records___several_covers_allele___difference_does_not_exceed_threshold___returns_empty_dict(
         self, *mocks):
     mapq_sam_records_filter = MAPQSamRecordsFilter(
         [], mapping_quality_threshold=10)
     actual = mapq_sam_records_filter._get_query_name_to_best_record(
         [mapping_quality_50_mock, mapping_quality_60_mock])
     assert actual == {}
コード例 #2
0
 def test___get_first_and_second_records_with_highest_mapping_qualities_that_covers_the_allele___several_records_covers_the_allele(
         self, *mocks):
     mapq_sam_records_filter = MAPQSamRecordsFilter([])
     actual = mapq_sam_records_filter._get_first_and_second_records_with_highest_mapping_qualities_that_covers_the_allele(
         [])
     expected = (mapping_quality_60_mock, mapping_quality_51_mock)
     assert actual == expected
コード例 #3
0
 def test___get_query_name_to_best_record___one_query_name_with_several_records___one_covers_allele(
         self, *mocks):
     mapq_sam_records_filter = MAPQSamRecordsFilter(
         [], mapping_quality_threshold=10)
     actual = mapq_sam_records_filter._get_query_name_to_best_record(
         [mapping_quality_49_mock, mapping_quality_40_mock])
     assert actual == {"query_name_1": mapping_quality_40_mock}
コード例 #4
0
 def test___get_query_name_to_best_record___one_query_name_with_one_record___returns_the_only_record(
         self):
     mapq_sam_records_filter = MAPQSamRecordsFilter(
         [], mapping_quality_threshold=10)
     actual = mapq_sam_records_filter._get_query_name_to_best_record(
         [mapping_quality_49_mock])
     assert actual == {"query_name_1": mapping_quality_49_mock}
コード例 #5
0
    def test____get_only_records_that_cover_the_allele_core___no_classifications___returns_empty_list(
            self):
        mapq_sam_records_filter = MAPQSamRecordsFilter([])

        classifications = []
        actual = mapq_sam_records_filter._get_only_records_that_cover_the_allele_core(
            classifications)
        expected = []
        assert actual == expected
コード例 #6
0
 def test___get_record_with_highest_mapping_quality___several_records(self):
     mapq_sam_records_filter = MAPQSamRecordsFilter([])
     record_with_highest_mapping_quality = mapq_sam_records_filter._get_record_with_highest_mapping_quality(
         [
             mapping_quality_49_mock, mapping_quality_50_mock,
             mapping_quality_51_mock, mapping_quality_49_mock,
             mapping_quality_49_mock, mapping_quality_51_mock,
             mapping_quality_60_mock
         ])
     assert record_with_highest_mapping_quality == mapping_quality_60_mock
コード例 #7
0
    def test___get_query_name_to_best_record___several_query_names_with_several_records(
        self,
        get_first_and_second_records_with_highest_mapping_qualities_that_covers_the_allele_mock
    ):
        query_name_2_mapping_quality_60_mock = Mock(query_name="query_name_2",
                                                    mapping_quality=60)
        query_name_2_mapping_quality_51_mock = Mock(query_name="query_name_2",
                                                    mapping_quality=51)
        query_name_3_mapping_quality_30_mock = Mock(query_name="query_name_3",
                                                    mapping_quality=30)
        query_name_3_mapping_quality_0_mock = Mock(query_name="query_name_3",
                                                   mapping_quality=0)
        query_name_4_mapping_quality_32_mock = Mock(query_name="query_name_4",
                                                    mapping_quality=32)
        query_name_4_mapping_quality_35_mock = Mock(query_name="query_name_4",
                                                    mapping_quality=35)

        def return_mock_function(records):
            if records[0].query_name == "query_name_1":
                # query_name_1 - exceeds threshold
                return (mapping_quality_60_mock, mapping_quality_49_mock)
            elif records[0].query_name == "query_name_2":
                # query_name_2 - does not exceed threshold
                return (query_name_2_mapping_quality_60_mock,
                        query_name_2_mapping_quality_51_mock)
            elif records[0].query_name == "query_name_3":
                # query_name_3 - only one record covers the allele
                return (query_name_3_mapping_quality_30_mock, None)
            elif records[0].query_name == "query_name_4":
                # query_name_4 - no record covers the allele
                return (None, None)

        get_first_and_second_records_with_highest_mapping_qualities_that_covers_the_allele_mock.side_effect = return_mock_function

        mapq_sam_records_filter = MAPQSamRecordsFilter(
            [], mapping_quality_threshold=10)
        actual = mapq_sam_records_filter._get_query_name_to_best_record([
            mapping_quality_49_mock,
            mapping_quality_40_mock,
            mapping_quality_60_mock,
            mapping_quality_40_mock,
            mapping_quality_40_mock,
            query_name_2_mapping_quality_51_mock,
            query_name_2_mapping_quality_60_mock,
            query_name_3_mapping_quality_0_mock,
            query_name_3_mapping_quality_30_mock,
            query_name_4_mapping_quality_32_mock,
            query_name_4_mapping_quality_35_mock,
        ])
        assert actual == {
            "query_name_1": mapping_quality_60_mock,
            "query_name_3": query_name_3_mapping_quality_30_mock,
            "query_name_4": query_name_4_mapping_quality_32_mock
        }
コード例 #8
0
    def test____get_only_records_that_cover_the_allele_core___several_classifications_only_two_cover_allele___returns_two_records(
            self):
        mapq_sam_records_filter = MAPQSamRecordsFilter([])

        records = [
            record_do_not_cover_allele_mock, record_do_not_cover_allele_mock,
            record_cover_allele_mock_1, record_do_not_cover_allele_mock,
            record_do_not_cover_allele_mock, record_cover_allele_mock_2
        ]
        actual = mapq_sam_records_filter._get_only_records_that_cover_the_allele_core(
            records)
        expected = ["record_1", "record_2"]
        assert actual == expected
コード例 #9
0
 def test___constructor___one_sam_record___records_to_keep_has_sam_record(
         self, get_query_name_to_best_record_mock):
     records_mock = Mock()
     mapq_sam_records_filter = MAPQSamRecordsFilter(records_mock)
     assert mapq_sam_records_filter.records_to_keep == {"best_record_1"}
     assert get_query_name_to_best_record_mock.called_once_with(
         records_mock)
コード例 #10
0
 def test___constructor___no_sam_records___records_to_keep_is_empty(
         self, get_query_name_to_best_record_mock):
     records_mock = Mock()
     mapq_sam_records_filter = MAPQSamRecordsFilter(records_mock)
     assert mapq_sam_records_filter.records_to_keep == set()
     assert get_query_name_to_best_record_mock.called_once_with(
         records_mock)
# setup
sam_filepath = snakemake.input.variant_call_probeset_mapped_to_ref
sample_id = snakemake.wildcards.sample_id
tool = snakemake.wildcards.tool
mask_filepath = snakemake.input.mask
variant_call_precision_report = snakemake.output.variant_call_precision_report
nb_of_records_removed_with_mapq_sam_records_filter_filepath = snakemake.output.nb_of_records_removed_with_mapq_sam_records_filter_filepath

# API usage
with pysam.AlignmentFile(sam_filepath) as sam:
    records = [record for record in sam]

logging.info(f"Applying MAPQ SAM records filter")
nb_of_records_before_mapq_sam_records_filter = len(records)
mapq_sam_records_filter = MAPQSamRecordsFilter(records)
records = mapq_sam_records_filter.filter_records(records)
nb_of_records_after_mapq_sam_records_filter = len(records)
nb_of_records_removed_with_mapq_sam_records_filter = nb_of_records_before_mapq_sam_records_filter - nb_of_records_after_mapq_sam_records_filter
nb_of_records_removed_with_mapq_sam_records_filter_proportion = nb_of_records_removed_with_mapq_sam_records_filter / nb_of_records_before_mapq_sam_records_filter if nb_of_records_before_mapq_sam_records_filter > 0 else 0

nb_of_records_removed_with_mapq_sam_records_filter_df = pd.DataFrame({
    "tool": [tool],
    "nb_of_records_before_mapq_sam_records_filter":
    [nb_of_records_before_mapq_sam_records_filter],
    "nb_of_records_after_mapq_sam_records_filter":
    [nb_of_records_after_mapq_sam_records_filter],
    "nb_of_records_removed_with_mapq_sam_records_filter":
    [nb_of_records_removed_with_mapq_sam_records_filter],
    "nb_of_records_removed_with_mapq_sam_records_filter_proportion":
    [nb_of_records_removed_with_mapq_sam_records_filter_proportion]
コード例 #12
0
 def test___get_record_with_highest_mapping_quality___no_records___returns_None(
         self):
     mapq_sam_records_filter = MAPQSamRecordsFilter([])
     record_with_highest_mapping_quality = mapq_sam_records_filter._get_record_with_highest_mapping_quality(
         [])
     assert record_with_highest_mapping_quality is None
コード例 #13
0
 def test___record_should_be_filtered_out___record_is_NOT_in_records_to_keep___should_be_filtered_out(
         self, *mocks):
     mapq_sam_records_filter = MAPQSamRecordsFilter([])
     assert mapq_sam_records_filter.record_should_be_filtered_out(
         mapping_quality_50_mock)
コード例 #14
0
 def test___get_query_name_to_best_record___no_records___return_empty_dict(
         self):
     mapq_sam_records_filter = MAPQSamRecordsFilter([])
     actual = mapq_sam_records_filter._get_query_name_to_best_record([])
     assert actual == {}
コード例 #15
0
 def test___get_record_with_highest_mapping_quality___one_record___returns_given_record(
         self):
     mapq_sam_records_filter = MAPQSamRecordsFilter([])
     record_with_highest_mapping_quality = mapq_sam_records_filter._get_record_with_highest_mapping_quality(
         [mapping_quality_49_mock])
     assert record_with_highest_mapping_quality == mapping_quality_49_mock