Example #1
0
 def test_text(self):
     sampleA = OrderedDict(sorted({"F1": "SA.1", "F2": "SA.2", "F3": "SA.3"}.items()))
     sampleB = OrderedDict(sorted({"F1": "SB.1", "F2": "SB.2", "F3": "SB.3"}.items()))
     sample_tag_values = OrderedDict(sorted({"SampleA": sampleA, "SampleB": sampleB}.items()))
     record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values)
     expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n")
     self.assertEquals(expected, record.text())
Example #2
0
 def test_asTextWhenEmptyFormatField(self):
     sampleA = OrderedDict({})
     sampleB = OrderedDict({})
     sample_tag_values = OrderedDict({"SampleA": sampleA, "SampleB": sampleB})
     record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values)
     expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|.|.|.\n")
     self.assertEquals(expected, record.text())
Example #3
0
    def test_empty_record(self):
        sample_names = ["SampleA"]
        base = VcfRecord.parse_record(self.entab("chr2|1|ID|A|C|QUAL|FILTER|INFO|F|S\n"), sample_names)

        empty_record = base.get_empty_record()

        expected_record = VcfRecord(chrom="chr2", pos="1", ref="A", alt="C")
        self.assertEquals(expected_record.text(), empty_record.text())
Example #4
0
 def testHash(self):
     sample_names = ["sampleA"]
     base = VcfRecord.parse_record(self.entab("A|B|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names)
     base_equivalent = VcfRecord.parse_record(self.entab("A|B|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertEquals(base.__hash__(), base_equivalent.__hash__())
     record_set = set()
     record_set.add(base)
     record_set.add(base_equivalent)
     self.assertEquals(1, len(record_set))
Example #5
0
    def test_empty_record(self):
        sample_names = ["SampleA"]
        base = VcfRecord.parse_record(
            self.entab("chr2|1|ID|A|C|QUAL|FILTER|INFO|F|S\n"), sample_names)

        empty_record = base.get_empty_record()

        expected_record = VcfRecord(chrom="chr2", pos="1", ref="A", alt="C")
        self.assertEquals(expected_record.text(), empty_record.text())
 def test_passed_tag_format(self):
     passed_tag = common_tags.PassedTag("foo")
     actual_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2|SA.1:SA.2|SB.1:SB.2\n")
     actual_vcf_record = VcfRecord.parse_record(actual_line, ["SA", "SB"])
     expected_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:foo_CALLER_REPORTED"
                                "|SA.1:SA.2:1|SB.1:SB.2\n")
     expected_vcf_record = VcfRecord.parse_record(expected_line, ["SA", "SB"])
     passed_tag.add_tag_values(actual_vcf_record)
     self.assertEquals(expected_vcf_record, actual_vcf_record)
Example #7
0
 def test_asTextExpandsEmptyTrailingFormatField(self):
     sampleA = OrderedDict([('a', '1'), ('b', '2')])
     sampleB = OrderedDict([('a', '10')])
     sample_tag_values = OrderedDict([("SampleA", sampleA),
                                      ("SampleB", sampleB)])
     record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL",
                        "FILTER", "INFO", sample_tag_values)
     expected = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|a:b|1:2|10:.\n")
     self.assertEquals(expected, record.text())
Example #8
0
    def testCompare_nonNumericChrom(self):
        sample_names = ["SampleA"]
        expected_records = [
            VcfRecord.parse_record(self.entab("chr2|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
            VcfRecord.parse_record(self.entab("chr5|1|ID|A|A|QUAL|FILTER||foo|S\n"), sample_names),
            VcfRecord.parse_record(self.entab("10|1|ID|A|C|QUAL|FILTER|INFO|F|S\n"), sample_names),
        ]

        input_records = expected_records[::-1]

        self.assertEquals(expected_records, sorted(input_records))
Example #9
0
    def test_join_info_fields_nullValues(self):
        sample_names = ["SampleA"]
        input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|.|F|S\n")
        vcf_record = VcfRecord.parse_record(input_line, sample_names)
        vcf_record._join_info_fields()
        self.assertEquals(".", vcf_record.info)

        vcf_record = VcfRecord.parse_record(input_line, sample_names)
        vcf_record.add_info_field("foo")
        vcf_record._join_info_fields()
        self.assertEquals("foo", vcf_record.info)
Example #10
0
 def test_asTextWhenEmptyFormatField(self):
     sampleA = OrderedDict({})
     sampleB = OrderedDict({})
     sample_tag_values = OrderedDict({
         "SampleA": sampleA,
         "SampleB": sampleB
     })
     record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL",
                        "FILTER", "INFO", sample_tag_values)
     expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|.|.|.\n")
     self.assertEquals(expected, record.text())
Example #11
0
 def testHash(self):
     sample_names = ["sampleA"]
     base = VcfRecord.parse_record(
         self.entab("A|B|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names)
     base_equivalent = VcfRecord.parse_record(
         self.entab("A|B|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertEquals(base.__hash__(), base_equivalent.__hash__())
     record_set = set()
     record_set.add(base)
     record_set.add(base_equivalent)
     self.assertEquals(1, len(record_set))
Example #12
0
    def test_join_info_fields_nullValues(self):
        sample_names = ["SampleA"]
        input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|.|F|S\n")
        vcf_record = VcfRecord.parse_record(input_line, sample_names)
        vcf_record._join_info_fields()
        self.assertEquals(".", vcf_record.info)

        vcf_record = VcfRecord.parse_record(input_line, sample_names)
        vcf_record.add_info_field("foo")
        vcf_record._join_info_fields()
        self.assertEquals("foo", vcf_record.info)
Example #13
0
 def test_passed_tag_format_noPass(self):
     passed_tag = common_tags.PassedTag("foo")
     actual_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|PASS|INFO|F1:F2|SA.1:SA.2|SB.1:SB.2\n")
     actual_vcf_record = VcfRecord.parse_record(actual_line, ["SA", "SB"])
     expected_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|PASS|INFO|F1:F2:JQ_foo_CALLER_PASSED"
         "|SA.1:SA.2:1|SB.1:SB.2:1\n")
     expected_vcf_record = VcfRecord.parse_record(expected_line,
                                                  ["SA", "SB"])
     passed_tag.add_tag_values(actual_vcf_record)
     self.assertEquals(expected_vcf_record.text(), actual_vcf_record.text())
Example #14
0
    def test_parse_record_removesNewlines(self):
        sample_names = ["SampleA"]
        input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\n")
        record = VcfRecord.parse_record(input_line, sample_names)
        self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"])

        input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\r")
        record = VcfRecord.parse_record(input_line, sample_names)
        self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"])

        input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\r\n")
        record = VcfRecord.parse_record(input_line, sample_names)
        self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"])
Example #15
0
    def test_parse_record_removesNewlines(self):
        sample_names = ["SampleA"]
        input_line = self.entab(
            "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\n")
        record = VcfRecord.parse_record(input_line, sample_names)
        self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"])

        input_line = self.entab(
            "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\r")
        record = VcfRecord.parse_record(input_line, sample_names)
        self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"])

        input_line = self.entab(
            "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\r\n")
        record = VcfRecord.parse_record(input_line, sample_names)
        self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"])
Example #16
0
 def test_format_tags(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n"
     )
     record = VcfRecord.parse_record(input_line, sample_names)
     self.assertEquals(set(["F1", "F2", "F3"]), record.format_tags)
Example #17
0
 def test_format_field_preservesOrderWhenAddingNewTags(self):
     sample_names = ["SA", "SB"]
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F3:F1:F2|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n")
     record = VcfRecord.parse_record(input_line, sample_names)
     record.add_sample_tag_value("Z4", {"SA": "SA.4", "SB": "SB.4"})
     record.add_sample_tag_value("A5", {"SA": "SA.A5", "SB": "SB.A5"})
     self.assertEquals("F3:F1:F2:Z4:A5", record._format_field())
Example #18
0
    def testCompare_nonNumericChrom(self):
        sample_names = ["SampleA"]
        expected_records = [
            VcfRecord.parse_record(
                self.entab("chr2|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"),
                sample_names),
            VcfRecord.parse_record(
                self.entab("chr5|1|ID|A|A|QUAL|FILTER||foo|S\n"),
                sample_names),
            VcfRecord.parse_record(
                self.entab("10|1|ID|A|C|QUAL|FILTER|INFO|F|S\n"), sample_names)
        ]

        input_records = expected_records[::-1]

        self.assertEquals(expected_records, sorted(input_records))
Example #19
0
 def test_parse_record_initsSampleTagValues(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n")
     record = VcfRecord.parse_record(input_line, sample_names)
     self.assertEquals(["SampleA", "SampleB"], sorted(record.sample_tag_values.keys()))
     self.assertEquals({"F1": "SA.1", "F2": "SA.2", "F3": "SA.3"}, record.sample_tag_values["SampleA"])
     self.assertEquals({"F1": "SB.1", "F2": "SB.2", "F3": "SB.3"}, record.sample_tag_values["SampleB"])
Example #20
0
    def test_add_tag_values_raisesNotImplementedError(self):
        class FakeTag(common_tags.AbstractJacquardTag):
            def __init__(self):
                pass

        tag = FakeTag()
        self.assertRaises(NotImplementedError, tag.add_tag_values,
                          VcfRecord("1", "42", "A", "C"))
Example #21
0
 def test_format_field(self):
     sample_names = ["SA", "SB"]
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F3:F1:F2|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n"
     )
     record = VcfRecord.parse_record(input_line, sample_names)
     self.assertEquals(["F3", "F1", "F2"],
                       list(record._format_tag_fields()))
Example #22
0
 def test_sample_tag_values_emptyDictWhenNoSampleData(self):
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|||\n")
     record = VcfRecord.parse_record(input_line,
                                     sample_names=["sampleA", "sampleB"])
     self.assertEquals(["sampleA", "sampleB"],
                       sorted(record.sample_tag_values.keys()))
     self.assertEquals({}, record.sample_tag_values["sampleA"])
     self.assertEquals({}, record.sample_tag_values["sampleB"])
    def test_add_tag_values_nullValues(self):
        line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}:{}{}|X:1:.|Y:1:.\n".format(mutect.JQ_MUTECT_TAG, common_tags.CALLER_PASSED, varscan.JQ_VARSCAN_TAG, common_tags.CALLER_PASSED))
        processedVcfRecord = VcfRecord.parse_record(line, ["SA", "SB"])
        tag = summarize_caller._CallersPassedListTag()
        tag.add_tag_values(processedVcfRecord)

        expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}:{}{}:{}CALLERS_PASSED_LIST|X:1:.:MT|Y:1:.:MT\n".format(mutect.JQ_MUTECT_TAG, common_tags.CALLER_PASSED, varscan.JQ_VARSCAN_TAG, common_tags.CALLER_PASSED, summarize_caller.JQ_SUMMARY_TAG))
        self.assertEquals(expected, processedVcfRecord.text())
    def test_add_tag_values(self):
        line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}:{}{}|X:1:1|Y:1:1\n".format(mutect.JQ_MUTECT_TAG, common_tags.CALLER_REPORTED, varscan.JQ_VARSCAN_TAG, common_tags.CALLER_REPORTED))
        processedVcfRecord = VcfRecord.parse_record(line, ["SA", "SB"])
        tag = summarize_caller._CallersReportedTag()
        tag.add_tag_values(processedVcfRecord)

        expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}:{}{}:{}{}|X:1:1:2|Y:1:1:2\n".format(mutect.JQ_MUTECT_TAG, common_tags.CALLER_REPORTED, varscan.JQ_VARSCAN_TAG, common_tags.CALLER_REPORTED, summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_REPORTED))
        self.assertEquals(expected, processedVcfRecord.text())
    def test_add_tag_values_nullValues(self):
        line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}|X:.|Y:.\n".format(summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_PASSED))
        processedVcfRecord = VcfRecord.parse_record(line, ["SA", "SB"])
        tag = summarize_caller._SamplesPassed()
        tag.add_tag_values(processedVcfRecord)

        expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO;{}{}=0|JQ_DP:{}{}|X:.|Y:.\n".format(summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_SAMPLES_PASSED, summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_PASSED))
        self.assertEquals(expected, processedVcfRecord.text())
Example #26
0
    def testCompare_orderingByNumericChromAndPos(self):
        sample_names = ["SampleA"]
        expected_records = [
            VcfRecord.parse_record(
                self.entab("1|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
            VcfRecord.parse_record(
                self.entab("2|1|ID|A|A|QUAL|FILTER||foo|S\n"), sample_names),
            VcfRecord.parse_record(
                self.entab("10|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"),
                sample_names),
            VcfRecord.parse_record(
                self.entab("11|1|ID|C|A|QUAL|FILTER|INFO|F|S\n"),
                sample_names),
            VcfRecord.parse_record(
                self.entab("20|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"),
                sample_names),
            VcfRecord.parse_record(
                self.entab("M|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
            VcfRecord.parse_record(
                self.entab("X|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names)
        ]

        input_records = expected_records[::-1]

        self.assertEquals(expected_records, sorted(input_records))
Example #27
0
 def test_insert_format_field_failsOnInvalidSampleDict(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n")
     record = VcfRecord.parse_record(input_line, sample_names)
     self.assertRaises(KeyError, record.add_sample_tag_value, "inserted", {"SampleA": 0.6})
     self.assertRaises(KeyError, record.add_sample_tag_value, "inserted", {"SampleA": 0.6, "SampleZ": 0.6})
     self.assertRaises(
         KeyError, record.add_sample_tag_value, "inserted", {"SampleA": 0.6, "SampleB": 0.6, "SampleZ": 0.6}
     )
Example #28
0
 def test_add_sample_format_value(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n")
     record = VcfRecord.parse_record(input_line, sample_names)
     record.add_sample_tag_value("inserted", {"SampleB": "insertedValueB", "SampleA": "insertedValueA"})
     expected = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3:inserted|SA.1:SA.2:SA.3:insertedValueA|SB.1:SB.2:SB.3:insertedValueB\n"
     )
     self.assertEquals(expected, record.text())
    def test_add_tag_values_nonePassed(self):
        line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}|X:0|Y:0\n".format(summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_PASSED))
        vcf_record = VcfRecord.parse_record(line, ["SA", "SB"])
        tag = summarize_caller._SamplesPassed()
        tag.add_tag_values(vcf_record)

        info_tag = summarize_caller.JQ_SUMMARY_TAG + summarize_caller.JQ_SAMPLES_PASSED
        self.assertIn(info_tag, vcf_record.info_dict)
        self.assertEquals("0", vcf_record.info_dict[info_tag])
Example #30
0
 def test_format_field_preservesOrderWhenAddingNewTags(self):
     sample_names = ["SA", "SB"]
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F3:F1:F2|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n"
     )
     record = VcfRecord.parse_record(input_line, sample_names)
     record.add_sample_tag_value("Z4", {"SA": "SA.4", "SB": "SB.4"})
     record.add_sample_tag_value("A5", {"SA": "SA.A5", "SB": "SB.A5"})
     self.assertEquals(["F3", "F1", "F2", "Z4", "A5"],
                       list(record._format_tag_fields()))
Example #31
0
 def test_insert_format_field_failsOnExistingField(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n"
     )
     record = VcfRecord.parse_record(input_line, sample_names)
     self.assertRaises(KeyError, record.add_sample_tag_value, "F1", {
         "SampleA": 0.6,
         "SampleB": 0.6
     })
Example #32
0
 def test_sample_tag_values_whenSparseSampleData(self):
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO|.|.\n")
     record = VcfRecord.parse_record(input_line,
                                     sample_names=["sampleA", "sampleB"])
     self.assertEquals(["sampleA", "sampleB"],
                       sorted(record.sample_tag_values.keys()))
     self.assertEquals(OrderedDict({"FOO": "."}),
                       record.sample_tag_values["sampleA"])
     self.assertEquals(OrderedDict({"FOO": "."}),
                       record.sample_tag_values["sampleB"])
Example #33
0
 def test_add_info_field_nonAssignedField(self):
     sample_names = ["SampleA"]
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|k1=v1;k2=v2;baz|F|S\n")
     vcf_record = VcfRecord.parse_record(input_line, sample_names)
     vcf_record.add_info_field("foo")
     self.assertEquals({
         "k1": "v1",
         "k2": "v2",
         "baz": "baz",
         "foo": "foo"
     }, vcf_record.info_dict)
Example #34
0
 def test_sample_tag_values(self):
     sample_tag_values = VcfRecord._sample_tag_values(
         ["sampleA", "sampleB"], "foo:bar",
         ["SA_foo:SA_bar", "SB_foo:SB_bar"])
     self.assertEquals({
         "foo": "SA_foo",
         "bar": "SA_bar"
     }, sample_tag_values["sampleA"])
     self.assertEquals({
         "foo": "SB_foo",
         "bar": "SB_bar"
     }, sample_tag_values["sampleB"])
Example #35
0
 def test_parse_record(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SA_foo:SA_bar|SB_foo:SB_bar\n")
     record = VcfRecord.parse_record(input_line, sample_names)
     self.assertEquals("CHROM", record.chrom)
     self.assertEquals("POS", record.pos)
     self.assertEquals("ID", record.vcf_id)
     self.assertEquals("REF", record.ref)
     self.assertEquals("ALT", record.alt)
     self.assertEquals("QUAL", record.qual)
     self.assertEquals("FILTER", record.filter)
     self.assertEquals("INFO", record.info)
Example #36
0
    def test_sample_field_whenInconsistentTags(self):
        # FYI this should never happen in the wild, but I wanted to test the exception formatting.
        sampleA = OrderedDict([('a', '1'), ('b', '2')])
        sampleB = OrderedDict([('a', '10')])
        sample_tag_values = OrderedDict([("SampleA", sampleA),
                                         ("SampleB", sampleB)])
        record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL",
                           "FILTER", "INFO", sample_tag_values)

        self.assertRaisesRegexp(
            ValueError,
            r'CHROM:POS:REF:ALT|sample format tags are not consistent: requested tags \[a\] but sample has has tags \[a=1, b=2\] leaving behind \[b\]',
            record._sample_field, ['a'], 'SampleA')
Example #37
0
 def test_add_sample_format_value(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n"
     )
     record = VcfRecord.parse_record(input_line, sample_names)
     record.add_sample_tag_value("inserted", {
         "SampleB": "insertedValueB",
         "SampleA": "insertedValueA"
     })
     expected = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3:inserted|SA.1:SA.2:SA.3:insertedValueA|SB.1:SB.2:SB.3:insertedValueB\n"
     )
     self.assertEquals(expected, record.text())
Example #38
0
 def test_parse_record(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SA_foo:SA_bar|SB_foo:SB_bar\n"
     )
     record = VcfRecord.parse_record(input_line, sample_names)
     self.assertEquals("CHROM", record.chrom)
     self.assertEquals("POS", record.pos)
     self.assertEquals("ID", record.vcf_id)
     self.assertEquals("REF", record.ref)
     self.assertEquals("ALT", record.alt)
     self.assertEquals("QUAL", record.qual)
     self.assertEquals("FILTER", record.filter)
     self.assertEquals("INFO", record.info)
Example #39
0
 def test_text(self):
     sampleA = OrderedDict(
         sorted({
             "F1": "SA.1",
             "F2": "SA.2",
             "F3": "SA.3"
         }.items()))
     sampleB = OrderedDict(
         sorted({
             "F1": "SB.1",
             "F2": "SB.2",
             "F3": "SB.3"
         }.items()))
     sample_tag_values = OrderedDict(
         sorted({
             "SampleA": sampleA,
             "SampleB": sampleB
         }.items()))
     record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL",
                        "FILTER", "INFO", sample_tag_values)
     expected = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n"
     )
     self.assertEquals(expected, record.text())
Example #40
0
    def testCompare_orderingByNumericChromAndPos(self):
        sample_names = ["SampleA"]
        expected_records = [
            VcfRecord.parse_record(self.entab("1|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
            VcfRecord.parse_record(self.entab("2|1|ID|A|A|QUAL|FILTER||foo|S\n"), sample_names),
            VcfRecord.parse_record(self.entab("10|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
            VcfRecord.parse_record(self.entab("11|1|ID|C|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
            VcfRecord.parse_record(self.entab("20|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
            VcfRecord.parse_record(self.entab("M|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
            VcfRecord.parse_record(self.entab("X|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names),
        ]

        input_records = expected_records[::-1]

        self.assertEquals(expected_records, sorted(input_records))
Example #41
0
 def test_parse_record_initsSampleTagValues(self):
     sample_names = ["SampleA", "SampleB"]
     input_line = self.entab(
         "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n"
     )
     record = VcfRecord.parse_record(input_line, sample_names)
     self.assertEquals(["SampleA", "SampleB"],
                       sorted(record.sample_tag_values.keys()))
     self.assertEquals({
         "F1": "SA.1",
         "F2": "SA.2",
         "F3": "SA.3"
     }, record.sample_tag_values["SampleA"])
     self.assertEquals({
         "F1": "SB.1",
         "F2": "SB.2",
         "F3": "SB.3"
     }, record.sample_tag_values["SampleB"])
Example #42
0
 def test_equals(self):
     sample_names = ["sampleA"]
     base = VcfRecord.parse_record(self.entab("A|1|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names)
     base_equivalent = VcfRecord.parse_record(self.entab("A|1|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertEquals(base, base_equivalent)
     different_chrom = VcfRecord.parse_record(self.entab("Z|1|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertNotEquals(base, different_chrom)
     different_pos = VcfRecord.parse_record(self.entab("A|2|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertNotEquals(base, different_pos)
     different_ref = VcfRecord.parse_record(self.entab("A|1|ID|Z|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertNotEquals(base, different_ref)
     different_alt = VcfRecord.parse_record(self.entab("A|1|ID|C|Z|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertNotEquals(base, different_alt)
Example #43
0
 def test_equals(self):
     sample_names = ["sampleA"]
     base = VcfRecord.parse_record(
         self.entab("A|1|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names)
     base_equivalent = VcfRecord.parse_record(
         self.entab("A|1|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertEquals(base, base_equivalent)
     different_chrom = VcfRecord.parse_record(
         self.entab("Z|1|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertNotEquals(base, different_chrom)
     different_pos = VcfRecord.parse_record(
         self.entab("A|2|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertNotEquals(base, different_pos)
     different_ref = VcfRecord.parse_record(
         self.entab("A|1|ID|Z|D|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertNotEquals(base, different_ref)
     different_alt = VcfRecord.parse_record(
         self.entab("A|1|ID|C|Z|QUAL|FILTER||foo|S\n"), sample_names)
     self.assertNotEquals(base, different_alt)
Example #44
0
 def test_add_info_field_nonAssignedField(self):
     sample_names = ["SampleA"]
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|k1=v1;k2=v2;baz|F|S\n")
     vcf_record = VcfRecord.parse_record(input_line, sample_names)
     vcf_record.add_info_field("foo")
     self.assertEquals({"k1": "v1", "k2": "v2", "baz": "baz", "foo": "foo"}, vcf_record.info_dict)
Example #45
0
 def test_sample_tag_values_emptyDictWhenExplicitNullSampleData(self):
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|.|.|.\n")
     record = VcfRecord.parse_record(input_line, sample_names=["sampleA", "sampleB"])
     self.assertEquals(["sampleA", "sampleB"], sorted(record.sample_tag_values.keys()))
     self.assertEquals({}, record.sample_tag_values["sampleA"])
     self.assertEquals({}, record.sample_tag_values["sampleB"])
Example #46
0
 def test_join_info_fields_orderedCorrectly(self):
     vcf_record = VcfRecord("chr1", "2", "A", "G", info="FOO;BAR;BAZ")
     vcf_record._join_info_fields()
     self.assertEquals("FOO;BAR;BAZ", vcf_record.info)
Example #47
0
 def test_add_or_replace_filter_duplicateFilterNotAdded(self):
     record = VcfRecord("chr1", "42", "XYZ", "C", vcf_filter="JQ_EXCLUDE")
     record.add_or_replace_filter("JQ_EXCLUDE")
     self.assertEquals("JQ_EXCLUDE", record.filter)
Example #48
0
 def test_format_field_emptyWhenNoSamples(self):
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO\n")
     record = VcfRecord.parse_record(input_line, [])
     self.assertEquals(".", record._format_field())
Example #49
0
 def test_join_info_fields_orderedCorrectly(self):
     vcf_record = VcfRecord("chr1", "2", "A", "G", info="FOO;BAR;BAZ")
     vcf_record._join_info_fields()
     self.assertEquals("FOO;BAR;BAZ", vcf_record.info)
Example #50
0
 def test_sample_tag_values_preservesSampleOrder(self):
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|||\n")
     record = VcfRecord.parse_record(input_line,
                                     sample_names=["sampleB", "sampleA"])
     self.assertEquals(["sampleA", "sampleB"],
                       sorted(record.sample_tag_values.keys()))
Example #51
0
 def test_sample_tag_values(self):
     sample_tag_values = VcfRecord._sample_tag_values(
         ["sampleA", "sampleB"], "foo:bar", ["SA_foo:SA_bar", "SB_foo:SB_bar"]
     )
     self.assertEquals({"foo": "SA_foo", "bar": "SA_bar"}, sample_tag_values["sampleA"])
     self.assertEquals({"foo": "SB_foo", "bar": "SB_bar"}, sample_tag_values["sampleB"])
Example #52
0
 def test_sample_tag_values_preservesSampleOrder(self):
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|||\n")
     record = VcfRecord.parse_record(input_line, sample_names=["sampleB", "sampleA"])
     self.assertEquals(["sampleA", "sampleB"], sorted(record.sample_tag_values.keys()))
Example #53
0
 def test_sample_tag_values_whenSparseSampleData(self):
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO|.|.\n")
     record = VcfRecord.parse_record(input_line, sample_names=["sampleA", "sampleB"])
     self.assertEquals(["sampleA", "sampleB"], sorted(record.sample_tag_values.keys()))
     self.assertEquals(OrderedDict({"FOO": "."}), record.sample_tag_values["sampleA"])
     self.assertEquals(OrderedDict({"FOO": "."}), record.sample_tag_values["sampleB"])
Example #54
0
 def test_add_or_replace_filter_duplicateFilterNotAdded(self):
     record = VcfRecord("chr1", "42", "XYZ", "C", vcf_filter="JQ_EXCLUDE")
     record.add_or_replace_filter("JQ_EXCLUDE")
     self.assertEquals("JQ_EXCLUDE", record.filter)
Example #55
0
 def test_sample_tag_values_emptyDictWhenNoSamples(self):
     input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO\n")
     record = VcfRecord.parse_record(input_line, sample_names=["sampleA", "sampleB"])
     self.assertEquals({}, record.sample_tag_values)
Example #56
0
 def test_add_or_replace_filter_filterReplacesEmptyFilter(self):
     record = VcfRecord("chr1", "42", "X", "C", vcf_filter="")
     record.add_or_replace_filter("JQ_EXCLUDE")
     self.assertEquals("JQ_EXCLUDE", record.filter)
Example #57
0
 def test_add_or_replace_filter_filterReplacesEmptyFilter(self):
     record = VcfRecord("chr1", "42", "X", "C", vcf_filter="")
     record.add_or_replace_filter("JQ_EXCLUDE")
     self.assertEquals("JQ_EXCLUDE", record.filter)
Example #58
0
 def test_get_info_dict_null(self):
     vcf_record = VcfRecord("chr1", "42", "A", "C", info=".")
     self.assertEquals({}, vcf_record.info_dict)
Example #59
0
 def test_add_or_replace_filter_filtersOnlyAppendsUnique(self):
     record = VcfRecord("chr1", "42", "XYZ", "C", vcf_filter="indelError")
     record.add_or_replace_filter("JQ_EXCLUDE")
     record.add_or_replace_filter("JQ_EXCLUDE")
     self.assertEquals("indelError;JQ_EXCLUDE", record.filter)
Example #60
0
 def test_add_or_replace_filter_filtersOnlyAppendsUnique(self):
     record = VcfRecord("chr1", "42", "XYZ", "C", vcf_filter="indelError")
     record.add_or_replace_filter("JQ_EXCLUDE")
     record.add_or_replace_filter("JQ_EXCLUDE")
     self.assertEquals("indelError;JQ_EXCLUDE", record.filter)