Esempio n. 1
0
    def test_add_tag_nullInputsProduceNullZScores(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "."
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "."
                                 },
                                 "SB": {
                                     "X": "8"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])
        tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader)

        tag.add_tag_values(rec1)
        sampleA_tag_values = rec1.sample_tag_values["SA"]
        self.assertEquals("-1.0", sampleA_tag_values["ZScoreX"])
        sampleB_tag_values = rec1.sample_tag_values["SB"]
        self.assertEquals(".", sampleB_tag_values["ZScoreX"])
Esempio n. 2
0
    def test_add_tag_zeroInputsIncluded(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "0"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "0"
                                 },
                                 "SB": {
                                     "X": "8"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])
        tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader)

        tag.add_tag_values(rec1)
        sampleA_tag_values = rec1.sample_tag_values["SA"]
        self.assertEquals("0.3015", sampleA_tag_values["ZScoreX"])
        sampleB_tag_values = rec1.sample_tag_values["SB"]
        self.assertEquals("-0.9045", sampleB_tag_values["ZScoreX"])
Esempio n. 3
0
    def test_init_createsAllTags(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "JQ_SUMMARY_AF_RANGE": "4"
                                 },
                                 "SB": {
                                     "JQ_SUMMARY_AF_RANGE": "7"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "JQ_SUMMARY_AF_RANGE": "13"
                                 },
                                 "SB": {
                                     "JQ_SUMMARY_AF_RANGE": "16"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])

        caller = zscore_caller.ZScoreCaller(reader)

        self.assertEquals(2, len(caller._tags))
        tag_ids = [tag.tag_id for tag in caller._tags]
        self.assertIn("JQ_SUMMARY_AF_ZSCORE", tag_ids)
        self.assertIn("JQ_SUMMARY_DP_ZSCORE", tag_ids)
Esempio n. 4
0
    def test_add_tag_doesNothingIfNoStdev(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "4"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "4"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])
        tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader)

        tag.add_tag_values(rec1)

        self.assertEquals(0, tag._stdev)
        self.assertEqual(["X"], sorted(rec1.sample_tag_values["SA"].keys()))
        self.assertEqual(["X"], sorted(rec1.sample_tag_values["SB"].keys()))
Esempio n. 5
0
    def test_add_tag(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "JQ_SUMMARY_DP_RANGE": "4"
                                 },
                                 "SB": {
                                     "JQ_SUMMARY_DP_RANGE": "7"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "JQ_SUMMARY_DP_RANGE": "13"
                                 },
                                 "SB": {
                                     "JQ_SUMMARY_DP_RANGE": "16"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])
        tag = zscore_caller._DepthZScoreTag(reader)

        tag.add_tag_values(rec1)
        sampleA_tag_values = rec1.sample_tag_values["SA"]
        self.assertEquals("-1.2649",
                          sampleA_tag_values["JQ_SUMMARY_DP_ZSCORE"])
        sampleB_tag_values = rec1.sample_tag_values["SB"]
        self.assertEquals("-0.6325",
                          sampleB_tag_values["JQ_SUMMARY_DP_ZSCORE"])
Esempio n. 6
0
    def test_init_setsPopulationStatistics(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "7"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "13"
                                 },
                                 "SB": {
                                     "X": "16"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])

        tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader)

        values = [4, 7, 13, 16]
        self.assertAlmostEquals(mean(values), tag._mean,
                                _ZScoreTag._MAX_PRECISION)
        self.assertAlmostEquals(stdev(values), tag._stdev,
                                _ZScoreTag._MAX_PRECISION)
Esempio n. 7
0
    def test_add_tag(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "7"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "13"
                                 },
                                 "SB": {
                                     "X": "16"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])
        tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader)

        tag.add_tag_values(rec1)
        sampleA_tag_values = rec1.sample_tag_values["SA"]
        self.assertEquals("-1.2649", sampleA_tag_values["ZScoreX"])
        sampleB_tag_values = rec1.sample_tag_values["SB"]
        self.assertEquals("-0.6325", sampleB_tag_values["ZScoreX"])
Esempio n. 8
0
    def test_vcf_records_newTagsPresent(self):
        record1 = vcf.VcfRecord(chrom="chr1",
                                pos="21",
                                ref="A",
                                alt="G",
                                sample_tag_values={
                                    "sampleA": {
                                        "DP2": "45"
                                    },
                                    "sampleB": {
                                        "DP2": "67"
                                    }
                                })
        record2 = vcf.VcfRecord(chrom="chr1",
                                pos="22",
                                ref="A",
                                alt="G",
                                sample_tag_values={
                                    "sampleA": {
                                        "TIR": "10,20",
                                        "DP2": "100"
                                    },
                                    "sampleB": {
                                        "TIR": "15,25",
                                        "DP2": "100"
                                    }
                                })
        vcf_reader = MockVcfReader(records=[record1, record2])

        strelka_vcf_reader = strelka._StrelkaVcfReader(vcf_reader)
        vcf_records = [record for record in strelka_vcf_reader.vcf_records()]

        self.assertEquals(2, len(vcf_records))

        self.assertIn("DP2", vcf_records[0].format_tags)
        self.assertIn(strelka.JQ_STRELKA_TAG + "DP",
                      vcf_records[0].format_tags)
        self.assertIn(strelka.JQ_STRELKA_TAG + "HC_SOM",
                      vcf_records[0].format_tags)
        self.assertIn(strelka.JQ_STRELKA_TAG + "CALLER_REPORTED",
                      vcf_records[0].format_tags)
        self.assertIn(strelka.JQ_STRELKA_TAG + "CALLER_PASSED",
                      vcf_records[0].format_tags)

        self.assertIn("TIR", vcf_records[1].format_tags)
        self.assertIn(strelka.JQ_STRELKA_TAG + "AF",
                      vcf_records[1].format_tags)
        self.assertIn(strelka.JQ_STRELKA_TAG + "HC_SOM",
                      vcf_records[1].format_tags)
        self.assertIn(strelka.JQ_STRELKA_TAG + "CALLER_REPORTED",
                      vcf_records[1].format_tags)
        self.assertIn(strelka.JQ_STRELKA_TAG + "CALLER_PASSED",
                      vcf_records[1].format_tags)
Esempio n. 9
0
    def test_vcf_records_newTagsPresent(self):
        record1 = vcf.VcfRecord(chrom="chr1",
                                pos="21",
                                ref="A",
                                alt="G",
                                sample_tag_values={
                                    "sampleA": {
                                        "DP": "45"
                                    },
                                    "sampleB": {
                                        "DP": "67"
                                    }
                                })
        record2 = vcf.VcfRecord(chrom="chr1",
                                pos="22",
                                ref="A",
                                alt="G",
                                sample_tag_values={
                                    "sampleA": {
                                        "FREQ": "46%"
                                    },
                                    "sampleB": {
                                        "FREQ": "68%"
                                    }
                                })
        vcf_reader = MockVcfReader(records=[record1, record2])

        varscan_vcf_reader = varscan._VarscanVcfReader(vcf_reader)
        vcf_records = [record for record in varscan_vcf_reader.vcf_records()]

        self.assertEquals(2, len(vcf_records))

        self.assertIn("DP", vcf_records[0].format_tags)
        self.assertIn(varscan.JQ_VARSCAN_TAG + "DP",
                      vcf_records[0].format_tags)
        self.assertIn(varscan.JQ_VARSCAN_TAG + "HC_SOM",
                      vcf_records[0].format_tags)
        self.assertIn(varscan.JQ_VARSCAN_TAG + "CALLER_REPORTED",
                      vcf_records[0].format_tags)
        self.assertIn(varscan.JQ_VARSCAN_TAG + "CALLER_PASSED",
                      vcf_records[0].format_tags)

        self.assertIn("FREQ", vcf_records[1].format_tags)
        self.assertIn(varscan.JQ_VARSCAN_TAG + "AF",
                      vcf_records[1].format_tags)
        self.assertIn(varscan.JQ_VARSCAN_TAG + "HC_SOM",
                      vcf_records[1].format_tags)
        self.assertIn(varscan.JQ_VARSCAN_TAG + "CALLER_REPORTED",
                      vcf_records[1].format_tags)
        self.assertIn(varscan.JQ_VARSCAN_TAG + "CALLER_PASSED",
                      vcf_records[1].format_tags)
Esempio n. 10
0
    def test_vcf_records_newTagsPresent(self):
        record1 = vcf.VcfRecord(chrom="chr1",
                                pos="21",
                                ref="A",
                                alt="G",
                                sample_tag_values={
                                    "sampleA": {
                                        "DP": "45"
                                    },
                                    "sampleB": {
                                        "DP": "67"
                                    }
                                })
        record2 = vcf.VcfRecord(chrom="chr1",
                                pos="22",
                                ref="A",
                                alt="G",
                                sample_tag_values={
                                    "sampleA": {
                                        "FA": "0.54"
                                    },
                                    "sampleB": {
                                        "FA": "0.76"
                                    }
                                })
        vcf_reader = MockVcfReader(metaheaders=['##FORMAT=<ID=FA,...>'],
                                   records=[record1, record2])

        mutect_vcf_reader = mutect._MutectVcfReader(vcf_reader)
        vcf_records = [record for record in mutect_vcf_reader.vcf_records()]

        self.assertEquals(2, len(vcf_records))

        self.assertIn("DP", vcf_records[0].format_tags)
        self.assertIn(mutect.JQ_MUTECT_TAG + "DP", vcf_records[0].format_tags)
        self.assertIn(mutect.JQ_MUTECT_TAG + "HC_SOM",
                      vcf_records[0].format_tags)
        self.assertIn(mutect.JQ_MUTECT_TAG + "CALLER_REPORTED",
                      vcf_records[0].format_tags)
        self.assertIn(mutect.JQ_MUTECT_TAG + "CALLER_PASSED",
                      vcf_records[0].format_tags)

        self.assertIn("FA", vcf_records[1].format_tags)
        self.assertIn(mutect.JQ_MUTECT_TAG + "AF", vcf_records[1].format_tags)
        self.assertIn(mutect.JQ_MUTECT_TAG + "HC_SOM",
                      vcf_records[1].format_tags)
        self.assertIn(mutect.JQ_MUTECT_TAG + "CALLER_REPORTED",
                      vcf_records[1].format_tags)
        self.assertIn(mutect.JQ_MUTECT_TAG + "CALLER_PASSED",
                      vcf_records[1].format_tags)
Esempio n. 11
0
    def test_create_row_dict_fieldNamesMangledToAvoidCollision(self):
        column_list = [
            "CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"
        ]  #, "FORMAT", "SAMPLE_A|NORMAL", "SAMPLE_A|TUMOR"]
        # sample_tag_values = {"SAMPLE_A|NORMAL":{"DP":"50", "AF":"0.2"},
        #                      "SAMPLE_A|TUMOR":{"DP":"87", "AF":"0.3"}}
        vcf_record = vcf.VcfRecord(
            "1",
            "42",
            "A",
            "AT",
            vcf_id="rs32",
            qual="30",
            vcf_filter="PASS",
            info="SNP;REF;ALT=Yep")  #sample_tag_values=sample_tag_values)
        actual_dict = expand._create_row_dict(column_list, vcf_record)

        expected_dict = {
            "CHROM": "1",
            "POS": "42",
            "ID": "rs32",
            "REF": "A",
            "ALT": "AT",
            "QUAL": "30",
            "FILTER": "PASS",
            "SNP": "SNP",
            "INFO_REF": "REF",
            "INFO_ALT": "Yep"
        }
        # "DP|SAMPLE_A|NORMAL": "50",
        # "DP|SAMPLE_A|TUMOR": "87",
        # "AF|SAMPLE_A|NORMAL": "0.2",
        # "AF|SAMPLE_A|TUMOR": "0.3"}
        self.assertEquals(expected_dict, actual_dict)
Esempio n. 12
0
def _build_merged_record(coordinate, vcf_records, all_sample_names,
                         tags_to_keep):

    all_tags = set()
    sparse_matrix = {}

    for record in vcf_records:
        for sample, tags in record.sample_tag_values.items():
            if sample not in sparse_matrix:
                sparse_matrix[sample] = {}
            for tag, value in list(tags.items()):
                if tag in tags_to_keep:
                    all_tags.add(tag)
                    sparse_matrix[sample][tag] = value

    full_matrix = OrderedDict()
    for sample in all_sample_names:
        full_matrix[sample] = OrderedDict()
        for tag in sorted(all_tags):
            try:
                full_matrix[sample][tag] = sparse_matrix[sample][tag]
            except KeyError:
                full_matrix[sample][tag] = "."

    merged_record = vcf.VcfRecord(coordinate.chrom,
                                  coordinate.pos,
                                  coordinate.ref,
                                  coordinate.alt,
                                  coordinate.vcf_id,
                                  coordinate.qual,
                                  coordinate.filter,
                                  coordinate.info,
                                  sample_tag_values=full_matrix)

    return merged_record
Esempio n. 13
0
 def test_add_tag_value_validIndelAltEdgecaseNoFilter(self):
     record = vcf.VcfRecord("chr1",
                            "42",
                            "A",
                            "ACGTNacgtn,*.",
                            vcf_filter="PASS")
     translate._ExcludeMissingAlt().add_tag_values(record)
     self.assertEquals("PASS", record.filter)
Esempio n. 14
0
 def test_add_tag_value_validIndelRefEdgecaseNoFilter(self):
     record = vcf.VcfRecord("chr1",
                            "42",
                            "ACGTNacgtn",
                            "C",
                            vcf_filter="PASS")
     translate._ExcludeMalformedRef().add_tag_values(record)
     self.assertEquals("PASS", record.filter)
Esempio n. 15
0
    def test_add_summarize_tags(self):
        file_writer = MockFileWriter()
        vcf_record = vcf.VcfRecord("chr1", "42", "A", "C")
        vcf_reader = MockVcfReader(records=[vcf_record])
        caller = MockSummarizeCaller()

        summarize._add_tags(caller, vcf_reader, file_writer)

        self.assertTrue(caller.add_tags_called)
Esempio n. 16
0
 def test_add_tag_values_highConfidenceDoesNotChangeFilter(self):
     record = vcf.VcfRecord("chr1", "42", "ref", "alt", vcf_filter="pass")
     input_reader = MockFileReader("foo.txt", [
         "chrom\tposition\tref\tvar", "chr1\t42\tref\tvar",
         "chr2\t50\tref\tvar"
     ])
     expected = "pass"
     actual = _HCTag(input_reader).add_tag_values(record).filter
     self.assertEquals(expected, actual)
Esempio n. 17
0
 def test_add_tag_values_lowConfidencePassingReplacesFilter(self):
     record = vcf.VcfRecord("chr1", "30", "ref", "alt", vcf_filter="pass")
     input_reader = MockFileReader("foo.txt", [
         "chrom\tposition\tref\tvar", "chr1\t42\tref\tvar",
         "chr2\t50\tref\tvar"
     ])
     tag = _HCTag(input_reader)
     expected = tag._TAG_ID
     actual = tag.add_tag_values(record).filter
     self.assertEquals(expected, actual)
Esempio n. 18
0
    def test_init_metaheaders(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "JQ_SUMMARY_DP_RANGE": "4"
                                 },
                                 "SB": {
                                     "JQ_SUMMARY_DP_RANGE": "7"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "JQ_SUMMARY_DP_RANGE": "13"
                                 },
                                 "SB": {
                                     "JQ_SUMMARY_DP_RANGE": "16"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])

        tag = zscore_caller._DepthZScoreTag(reader)

        self.assertEquals(3, len(tag.metaheaders))
        it = iter(tag.metaheaders)
        self.assertRegexpMatches(
            next(it),
            '##jacquard.summarize.JQ_SUMMARY_DP_ZSCORE.JQ_SUMMARY_DP_RANGE_mean='
        )
        self.assertRegexpMatches(
            next(it),
            '##jacquard.summarize.JQ_SUMMARY_DP_ZSCORE.JQ_SUMMARY_DP_RANGE_stdev='
        )
        self.assertRegexpMatches(
            next(it),
            '##FORMAT=<ID=JQ_SUMMARY_DP_ZSCORE,Number=1,Type=Float,Description="Concordance of reported depth.*">'
        )
Esempio n. 19
0
    def test_init_metaheaders(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "7"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "13"
                                 },
                                 "SB": {
                                     "X": "16"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])

        tag = zscore_caller._ZScoreTag(
            "ZScoreX",
            '##FORMAT=<ID=ZScoreX,Number=1,Type=Float,Description="ZScore for X">',
            "X", reader)

        self.assertEquals(3, len(tag.metaheaders))
        it = iter(tag.metaheaders)
        self.assertEquals(
            next(it), '##jacquard.summarize.ZScoreX.X_mean=' + repr(tag._mean))
        self.assertEquals(
            next(it),
            '##jacquard.summarize.ZScoreX.X_stdev=' + repr(tag._stdev))
        self.assertRegexpMatches(
            next(it),
            '##FORMAT=<ID=ZScoreX,Number=1,Type=Float,Description="ZScore for X">'
        )
Esempio n. 20
0
    def test_init_createsAllMetaheaders(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "JQ_SUMMARY_AF_RANGE": "4"
                                 },
                                 "SB": {
                                     "JQ_SUMMARY_AF_RANGE": "7"
                                 }
                             })
        rec2 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "JQ_SUMMARY_AF_RANGE": "13"
                                 },
                                 "SB": {
                                     "JQ_SUMMARY_AF_RANGE": "16"
                                 }
                             })
        reader = MockVcfReader(records=[rec1, rec2])

        caller = zscore_caller.ZScoreCaller(reader)

        self.assertEquals(6, len(caller.metaheaders))
        it = iter(caller.metaheaders)
        self.assertRegexpMatches(
            next(it), "JQ_SUMMARY_AF_ZSCORE.JQ_SUMMARY_AF_RANGE_mean")
        self.assertRegexpMatches(
            next(it), "JQ_SUMMARY_AF_ZSCORE.JQ_SUMMARY_AF_RANGE_stdev")
        self.assertRegexpMatches(next(it), "FORMAT=<ID=JQ_SUMMARY_AF_ZSCORE")
        self.assertRegexpMatches(
            next(it), "JQ_SUMMARY_DP_ZSCORE.JQ_SUMMARY_DP_RANGE_mean")
        self.assertRegexpMatches(
            next(it), "JQ_SUMMARY_DP_ZSCORE.JQ_SUMMARY_DP_RANGE_stdev")
        self.assertRegexpMatches(next(it), "FORMAT=<ID=JQ_SUMMARY_DP_ZSCORE")
Esempio n. 21
0
    def test_vcf_records_SomHcFileSNP(self):
        record1 = vcf.VcfRecord(chrom="chr1",
                                pos="21",
                                ref="A",
                                alt="G",
                                vcf_filter="PASS")
        record2 = vcf.VcfRecord(chrom="chr1",
                                pos="22",
                                ref="A",
                                alt="T",
                                vcf_filter="PASS")
        vcf_reader = MockVcfReader(records=[record1, record2])

        content1 = ["chrom\tposition", "chr1\t21", "chr1\t22"]
        somatic_hc_reader = MockFileReader("fileA.Somatic.hc.fpfilter.pass",
                                           content1)

        varscan_vcf_reader = varscan._VarscanVcfReader(vcf_reader,
                                                       somatic_hc_reader)
        vcf_records = [record for record in varscan_vcf_reader.vcf_records()]

        self.assertEquals(2, len(vcf_records))
        self.assertIn("PASS", vcf_records[0].filter)
        self.assertIn("PASS", vcf_records[1].filter)
Esempio n. 22
0
    def test_translate_files(self):
        record = vcf.VcfRecord("chr1",
                               "42",
                               "A",
                               "C",
                               sample_tag_values=OrderedDict(
                                   sorted({
                                       "SA": OrderedDict(),
                                       "SB": OrderedDict()
                                   }.items())))
        reader = MockVcfReader(metaheaders=["##metaheader1", "##metaheader2"],
                               records=[record],
                               sample_names=["SA", "SB"])
        writer = MockWriter()
        execution_context = []
        new_tags = [
            MockTag("TAG1",
                    OrderedDict(sorted({
                        "SA": 42,
                        "SB": 43
                    }.items())),
                    metaheader="##newTag1"),
            MockTag("TAG2",
                    OrderedDict(sorted({
                        "SA": 420,
                        "SB": 430
                    }.items())),
                    metaheader="##newTag2")
        ]
        translate._translate_files(reader, new_tags, execution_context, writer)

        self.assertTrue(reader.opened)
        self.assertTrue(writer.opened)
        expected = [
            '##metaheader1', '##metaheader2', '##newTag1', '##newTag2',
            '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNORMAL\tTUMOR'
        ]
        self.assertEquals(expected, writer._content[0:5])
        self.assertRegexpMatches(writer._content[5], "TAG1:TAG2")
        self.assertRegexpMatches(writer._content[5], "42:420")
        self.assertRegexpMatches(writer._content[5], "43:430")

        self.assertTrue(reader.closed)
        self.assertTrue(writer.closed)
Esempio n. 23
0
    def test_create_row_dict(self):
        column_list = [
            "CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO",
            "FORMAT", "SAMPLE_A|NORMAL", "SAMPLE_A|TUMOR"
        ]
        sample_tag_values = {
            "SAMPLE_A|NORMAL": {
                "DP": "50",
                "AF": "0.2"
            },
            "SAMPLE_A|TUMOR": {
                "DP": "87",
                "AF": "0.3"
            }
        }
        vcf_record = vcf.VcfRecord("1",
                                   "42",
                                   "A",
                                   "AT",
                                   vcf_id="rs32",
                                   qual="30",
                                   vcf_filter="PASS",
                                   info="SNP;SOMATIC=1",
                                   sample_tag_values=sample_tag_values)
        actual_dict = expand._create_row_dict(column_list, vcf_record)

        expected_dict = {
            "CHROM": "1",
            "POS": "42",
            "ID": "rs32",
            "REF": "A",
            "ALT": "AT",
            "QUAL": "30",
            "FILTER": "PASS",
            "SNP": "SNP",
            "SOMATIC": "1",
            "DP|SAMPLE_A|NORMAL": "50",
            "DP|SAMPLE_A|TUMOR": "87",
            "AF|SAMPLE_A|NORMAL": "0.2",
            "AF|SAMPLE_A|TUMOR": "0.3"
        }
        self.assertEquals(expected_dict, actual_dict)
Esempio n. 24
0
    def test_add_tags(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "7"
                                 }
                             })
        reader = MockVcfReader(records=[rec1])

        caller = zscore_caller.ZScoreCaller(reader)
        caller._tags = [MockTag("Y", {"SA": "A42", "SB": "B42"})]
        caller.add_tags(rec1)

        self.assertEquals({"X": "4", "Y": "A42"}, rec1.sample_tag_values["SA"])
Esempio n. 25
0
    def test_init_setsPopulationStatisticsAssignsStddevCorrectlyWhenNoValues(
            self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "."
                                 },
                                 "SB": {
                                     "X": "."
                                 }
                             })
        reader = MockVcfReader(records=[rec1])

        tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader)

        expected_mean = None
        expected_stdev = None
        self.assertEquals(expected_mean, tag._mean)
        self.assertEquals(expected_stdev, tag._stdev)
Esempio n. 26
0
    def test_init_setsPopulationStatisticsRoundsTo13digits(self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "1"
                                 },
                                 "SB": {
                                     "X": "1"
                                 },
                                 "SC": {
                                     "X": "0"
                                 }
                             })
        reader = MockVcfReader(records=[rec1])

        tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader)

        self.assertEquals(round(2 / 3, 13), tag._mean, repr(tag._mean))
        self.assertEquals(round(stdev([1, 1, 0]), 13), tag._stdev)
Esempio n. 27
0
    def test_init_setsPopulationStatisticsAssignsStddevCorrectlyWhenOneValue(
            self):
        rec1 = vcf.VcfRecord("1",
                             "42",
                             "A",
                             "C",
                             sample_tag_values={
                                 "SA": {
                                     "X": "4"
                                 },
                                 "SB": {
                                     "X": "."
                                 }
                             })
        reader = MockVcfReader(records=[rec1])

        tag = zscore_caller._ZScoreTag("ZScoreX", "ZScore for X", "X", reader)

        values = [4]
        self.assertAlmostEquals(mean(values), tag._mean,
                                _ZScoreTag._MAX_PRECISION)
        self.assertAlmostEquals(stdev(values), tag._stdev,
                                _ZScoreTag._MAX_PRECISION)
Esempio n. 28
0
 def test_add_tag_value_missingAltBothReplacesFilter(self):
     record = vcf.VcfRecord("chr1", "42", "A", ".*", vcf_filter="PASS")
     translate._ExcludeMalformedAlt().add_tag_values(record)
     self.assertEquals("JQ_EXCLUDE_MALFORMED_ALT", record.filter)
Esempio n. 29
0
 def test_add_tag_value_missingAltNullReplacesFilter(self):
     record = vcf.VcfRecord("chr1", "42", "A", ".", vcf_filter="PASS")
     translate._ExcludeMissingAlt().add_tag_values(record)
     self.assertEquals("JQ_EXCLUDE_MISSING_ALT", record.filter)
Esempio n. 30
0
 def test_add_tag_value_missingAltBothNoFilter(self):
     record = vcf.VcfRecord("chr1", "42", "A", ".*", vcf_filter="PASS")
     translate._ExcludeMissingAlt().add_tag_values(record)
     self.assertEquals("PASS", record.filter)