def test_text(self): sampleA = OrderedDict(sorted({"F1": "SA.1", "F2": "SA.2", "F3": "SA.3"}.items())) sampleB = OrderedDict(sorted({"F1": "SB.1", "F2": "SB.2", "F3": "SB.3"}.items())) sample_tag_values = OrderedDict(sorted({"SampleA": sampleA, "SampleB": sampleB}.items())) record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values) expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n") self.assertEquals(expected, record.text())
def test_asTextWhenEmptyFormatField(self): sampleA = OrderedDict({}) sampleB = OrderedDict({}) sample_tag_values = OrderedDict({"SampleA": sampleA, "SampleB": sampleB}) record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values) expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|.|.|.\n") self.assertEquals(expected, record.text())
def test_empty_record(self): sample_names = ["SampleA"] base = VcfRecord.parse_record(self.entab("chr2|1|ID|A|C|QUAL|FILTER|INFO|F|S\n"), sample_names) empty_record = base.get_empty_record() expected_record = VcfRecord(chrom="chr2", pos="1", ref="A", alt="C") self.assertEquals(expected_record.text(), empty_record.text())
def testHash(self): sample_names = ["sampleA"] base = VcfRecord.parse_record(self.entab("A|B|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names) base_equivalent = VcfRecord.parse_record(self.entab("A|B|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertEquals(base.__hash__(), base_equivalent.__hash__()) record_set = set() record_set.add(base) record_set.add(base_equivalent) self.assertEquals(1, len(record_set))
def test_empty_record(self): sample_names = ["SampleA"] base = VcfRecord.parse_record( self.entab("chr2|1|ID|A|C|QUAL|FILTER|INFO|F|S\n"), sample_names) empty_record = base.get_empty_record() expected_record = VcfRecord(chrom="chr2", pos="1", ref="A", alt="C") self.assertEquals(expected_record.text(), empty_record.text())
def test_passed_tag_format(self): passed_tag = common_tags.PassedTag("foo") actual_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2|SA.1:SA.2|SB.1:SB.2\n") actual_vcf_record = VcfRecord.parse_record(actual_line, ["SA", "SB"]) expected_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:foo_CALLER_REPORTED" "|SA.1:SA.2:1|SB.1:SB.2\n") expected_vcf_record = VcfRecord.parse_record(expected_line, ["SA", "SB"]) passed_tag.add_tag_values(actual_vcf_record) self.assertEquals(expected_vcf_record, actual_vcf_record)
def test_asTextExpandsEmptyTrailingFormatField(self): sampleA = OrderedDict([('a', '1'), ('b', '2')]) sampleB = OrderedDict([('a', '10')]) sample_tag_values = OrderedDict([("SampleA", sampleA), ("SampleB", sampleB)]) record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values) expected = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|a:b|1:2|10:.\n") self.assertEquals(expected, record.text())
def testCompare_nonNumericChrom(self): sample_names = ["SampleA"] expected_records = [ VcfRecord.parse_record(self.entab("chr2|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record(self.entab("chr5|1|ID|A|A|QUAL|FILTER||foo|S\n"), sample_names), VcfRecord.parse_record(self.entab("10|1|ID|A|C|QUAL|FILTER|INFO|F|S\n"), sample_names), ] input_records = expected_records[::-1] self.assertEquals(expected_records, sorted(input_records))
def test_join_info_fields_nullValues(self): sample_names = ["SampleA"] input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|.|F|S\n") vcf_record = VcfRecord.parse_record(input_line, sample_names) vcf_record._join_info_fields() self.assertEquals(".", vcf_record.info) vcf_record = VcfRecord.parse_record(input_line, sample_names) vcf_record.add_info_field("foo") vcf_record._join_info_fields() self.assertEquals("foo", vcf_record.info)
def test_asTextWhenEmptyFormatField(self): sampleA = OrderedDict({}) sampleB = OrderedDict({}) sample_tag_values = OrderedDict({ "SampleA": sampleA, "SampleB": sampleB }) record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values) expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|.|.|.\n") self.assertEquals(expected, record.text())
def testHash(self): sample_names = ["sampleA"] base = VcfRecord.parse_record( self.entab("A|B|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names) base_equivalent = VcfRecord.parse_record( self.entab("A|B|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertEquals(base.__hash__(), base_equivalent.__hash__()) record_set = set() record_set.add(base) record_set.add(base_equivalent) self.assertEquals(1, len(record_set))
def test_passed_tag_format_noPass(self): passed_tag = common_tags.PassedTag("foo") actual_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|PASS|INFO|F1:F2|SA.1:SA.2|SB.1:SB.2\n") actual_vcf_record = VcfRecord.parse_record(actual_line, ["SA", "SB"]) expected_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|PASS|INFO|F1:F2:JQ_foo_CALLER_PASSED" "|SA.1:SA.2:1|SB.1:SB.2:1\n") expected_vcf_record = VcfRecord.parse_record(expected_line, ["SA", "SB"]) passed_tag.add_tag_values(actual_vcf_record) self.assertEquals(expected_vcf_record.text(), actual_vcf_record.text())
def test_parse_record_removesNewlines(self): sample_names = ["SampleA"] input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\n") record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"]) input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\r") record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"]) input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\r\n") record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"])
def test_parse_record_removesNewlines(self): sample_names = ["SampleA"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\n") record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"]) input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\r") record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"]) input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SB_foo:SB_bar\r\n") record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals("SB_bar", record.sample_tag_values["SampleA"]["BAR"])
def test_format_tags(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n" ) record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals(set(["F1", "F2", "F3"]), record.format_tags)
def test_format_field_preservesOrderWhenAddingNewTags(self): sample_names = ["SA", "SB"] input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F3:F1:F2|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n") record = VcfRecord.parse_record(input_line, sample_names) record.add_sample_tag_value("Z4", {"SA": "SA.4", "SB": "SB.4"}) record.add_sample_tag_value("A5", {"SA": "SA.A5", "SB": "SB.A5"}) self.assertEquals("F3:F1:F2:Z4:A5", record._format_field())
def testCompare_nonNumericChrom(self): sample_names = ["SampleA"] expected_records = [ VcfRecord.parse_record( self.entab("chr2|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record( self.entab("chr5|1|ID|A|A|QUAL|FILTER||foo|S\n"), sample_names), VcfRecord.parse_record( self.entab("10|1|ID|A|C|QUAL|FILTER|INFO|F|S\n"), sample_names) ] input_records = expected_records[::-1] self.assertEquals(expected_records, sorted(input_records))
def test_parse_record_initsSampleTagValues(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n") record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals(["SampleA", "SampleB"], sorted(record.sample_tag_values.keys())) self.assertEquals({"F1": "SA.1", "F2": "SA.2", "F3": "SA.3"}, record.sample_tag_values["SampleA"]) self.assertEquals({"F1": "SB.1", "F2": "SB.2", "F3": "SB.3"}, record.sample_tag_values["SampleB"])
def test_add_tag_values_raisesNotImplementedError(self): class FakeTag(common_tags.AbstractJacquardTag): def __init__(self): pass tag = FakeTag() self.assertRaises(NotImplementedError, tag.add_tag_values, VcfRecord("1", "42", "A", "C"))
def test_format_field(self): sample_names = ["SA", "SB"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F3:F1:F2|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n" ) record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals(["F3", "F1", "F2"], list(record._format_tag_fields()))
def test_sample_tag_values_emptyDictWhenNoSampleData(self): input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|||\n") record = VcfRecord.parse_record(input_line, sample_names=["sampleA", "sampleB"]) self.assertEquals(["sampleA", "sampleB"], sorted(record.sample_tag_values.keys())) self.assertEquals({}, record.sample_tag_values["sampleA"]) self.assertEquals({}, record.sample_tag_values["sampleB"])
def test_add_tag_values_nullValues(self): line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}:{}{}|X:1:.|Y:1:.\n".format(mutect.JQ_MUTECT_TAG, common_tags.CALLER_PASSED, varscan.JQ_VARSCAN_TAG, common_tags.CALLER_PASSED)) processedVcfRecord = VcfRecord.parse_record(line, ["SA", "SB"]) tag = summarize_caller._CallersPassedListTag() tag.add_tag_values(processedVcfRecord) expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}:{}{}:{}CALLERS_PASSED_LIST|X:1:.:MT|Y:1:.:MT\n".format(mutect.JQ_MUTECT_TAG, common_tags.CALLER_PASSED, varscan.JQ_VARSCAN_TAG, common_tags.CALLER_PASSED, summarize_caller.JQ_SUMMARY_TAG)) self.assertEquals(expected, processedVcfRecord.text())
def test_add_tag_values(self): line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}:{}{}|X:1:1|Y:1:1\n".format(mutect.JQ_MUTECT_TAG, common_tags.CALLER_REPORTED, varscan.JQ_VARSCAN_TAG, common_tags.CALLER_REPORTED)) processedVcfRecord = VcfRecord.parse_record(line, ["SA", "SB"]) tag = summarize_caller._CallersReportedTag() tag.add_tag_values(processedVcfRecord) expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}:{}{}:{}{}|X:1:1:2|Y:1:1:2\n".format(mutect.JQ_MUTECT_TAG, common_tags.CALLER_REPORTED, varscan.JQ_VARSCAN_TAG, common_tags.CALLER_REPORTED, summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_REPORTED)) self.assertEquals(expected, processedVcfRecord.text())
def test_add_tag_values_nullValues(self): line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}|X:.|Y:.\n".format(summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_PASSED)) processedVcfRecord = VcfRecord.parse_record(line, ["SA", "SB"]) tag = summarize_caller._SamplesPassed() tag.add_tag_values(processedVcfRecord) expected = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO;{}{}=0|JQ_DP:{}{}|X:.|Y:.\n".format(summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_SAMPLES_PASSED, summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_PASSED)) self.assertEquals(expected, processedVcfRecord.text())
def testCompare_orderingByNumericChromAndPos(self): sample_names = ["SampleA"] expected_records = [ VcfRecord.parse_record( self.entab("1|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record( self.entab("2|1|ID|A|A|QUAL|FILTER||foo|S\n"), sample_names), VcfRecord.parse_record( self.entab("10|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record( self.entab("11|1|ID|C|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record( self.entab("20|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record( self.entab("M|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record( self.entab("X|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names) ] input_records = expected_records[::-1] self.assertEquals(expected_records, sorted(input_records))
def test_insert_format_field_failsOnInvalidSampleDict(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n") record = VcfRecord.parse_record(input_line, sample_names) self.assertRaises(KeyError, record.add_sample_tag_value, "inserted", {"SampleA": 0.6}) self.assertRaises(KeyError, record.add_sample_tag_value, "inserted", {"SampleA": 0.6, "SampleZ": 0.6}) self.assertRaises( KeyError, record.add_sample_tag_value, "inserted", {"SampleA": 0.6, "SampleB": 0.6, "SampleZ": 0.6} )
def test_add_sample_format_value(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n") record = VcfRecord.parse_record(input_line, sample_names) record.add_sample_tag_value("inserted", {"SampleB": "insertedValueB", "SampleA": "insertedValueA"}) expected = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3:inserted|SA.1:SA.2:SA.3:insertedValueA|SB.1:SB.2:SB.3:insertedValueB\n" ) self.assertEquals(expected, record.text())
def test_add_tag_values_nonePassed(self): line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|JQ_DP:{}{}|X:0|Y:0\n".format(summarize_caller.JQ_SUMMARY_TAG, summarize_caller.JQ_PASSED)) vcf_record = VcfRecord.parse_record(line, ["SA", "SB"]) tag = summarize_caller._SamplesPassed() tag.add_tag_values(vcf_record) info_tag = summarize_caller.JQ_SUMMARY_TAG + summarize_caller.JQ_SAMPLES_PASSED self.assertIn(info_tag, vcf_record.info_dict) self.assertEquals("0", vcf_record.info_dict[info_tag])
def test_format_field_preservesOrderWhenAddingNewTags(self): sample_names = ["SA", "SB"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F3:F1:F2|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n" ) record = VcfRecord.parse_record(input_line, sample_names) record.add_sample_tag_value("Z4", {"SA": "SA.4", "SB": "SB.4"}) record.add_sample_tag_value("A5", {"SA": "SA.A5", "SB": "SB.A5"}) self.assertEquals(["F3", "F1", "F2", "Z4", "A5"], list(record._format_tag_fields()))
def test_insert_format_field_failsOnExistingField(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n" ) record = VcfRecord.parse_record(input_line, sample_names) self.assertRaises(KeyError, record.add_sample_tag_value, "F1", { "SampleA": 0.6, "SampleB": 0.6 })
def test_sample_tag_values_whenSparseSampleData(self): input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO|.|.\n") record = VcfRecord.parse_record(input_line, sample_names=["sampleA", "sampleB"]) self.assertEquals(["sampleA", "sampleB"], sorted(record.sample_tag_values.keys())) self.assertEquals(OrderedDict({"FOO": "."}), record.sample_tag_values["sampleA"]) self.assertEquals(OrderedDict({"FOO": "."}), record.sample_tag_values["sampleB"])
def test_add_info_field_nonAssignedField(self): sample_names = ["SampleA"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|k1=v1;k2=v2;baz|F|S\n") vcf_record = VcfRecord.parse_record(input_line, sample_names) vcf_record.add_info_field("foo") self.assertEquals({ "k1": "v1", "k2": "v2", "baz": "baz", "foo": "foo" }, vcf_record.info_dict)
def test_sample_tag_values(self): sample_tag_values = VcfRecord._sample_tag_values( ["sampleA", "sampleB"], "foo:bar", ["SA_foo:SA_bar", "SB_foo:SB_bar"]) self.assertEquals({ "foo": "SA_foo", "bar": "SA_bar" }, sample_tag_values["sampleA"]) self.assertEquals({ "foo": "SB_foo", "bar": "SB_bar" }, sample_tag_values["sampleB"])
def test_parse_record(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SA_foo:SA_bar|SB_foo:SB_bar\n") record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals("CHROM", record.chrom) self.assertEquals("POS", record.pos) self.assertEquals("ID", record.vcf_id) self.assertEquals("REF", record.ref) self.assertEquals("ALT", record.alt) self.assertEquals("QUAL", record.qual) self.assertEquals("FILTER", record.filter) self.assertEquals("INFO", record.info)
def test_sample_field_whenInconsistentTags(self): # FYI this should never happen in the wild, but I wanted to test the exception formatting. sampleA = OrderedDict([('a', '1'), ('b', '2')]) sampleB = OrderedDict([('a', '10')]) sample_tag_values = OrderedDict([("SampleA", sampleA), ("SampleB", sampleB)]) record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values) self.assertRaisesRegexp( ValueError, r'CHROM:POS:REF:ALT|sample format tags are not consistent: requested tags \[a\] but sample has has tags \[a=1, b=2\] leaving behind \[b\]', record._sample_field, ['a'], 'SampleA')
def test_add_sample_format_value(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n" ) record = VcfRecord.parse_record(input_line, sample_names) record.add_sample_tag_value("inserted", { "SampleB": "insertedValueB", "SampleA": "insertedValueA" }) expected = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3:inserted|SA.1:SA.2:SA.3:insertedValueA|SB.1:SB.2:SB.3:insertedValueB\n" ) self.assertEquals(expected, record.text())
def test_parse_record(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO:BAR|SA_foo:SA_bar|SB_foo:SB_bar\n" ) record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals("CHROM", record.chrom) self.assertEquals("POS", record.pos) self.assertEquals("ID", record.vcf_id) self.assertEquals("REF", record.ref) self.assertEquals("ALT", record.alt) self.assertEquals("QUAL", record.qual) self.assertEquals("FILTER", record.filter) self.assertEquals("INFO", record.info)
def test_text(self): sampleA = OrderedDict( sorted({ "F1": "SA.1", "F2": "SA.2", "F3": "SA.3" }.items())) sampleB = OrderedDict( sorted({ "F1": "SB.1", "F2": "SB.2", "F3": "SB.3" }.items())) sample_tag_values = OrderedDict( sorted({ "SampleA": sampleA, "SampleB": sampleB }.items())) record = VcfRecord("CHROM", "POS", "REF", "ALT", "ID", "QUAL", "FILTER", "INFO", sample_tag_values) expected = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n" ) self.assertEquals(expected, record.text())
def testCompare_orderingByNumericChromAndPos(self): sample_names = ["SampleA"] expected_records = [ VcfRecord.parse_record(self.entab("1|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record(self.entab("2|1|ID|A|A|QUAL|FILTER||foo|S\n"), sample_names), VcfRecord.parse_record(self.entab("10|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record(self.entab("11|1|ID|C|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record(self.entab("20|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record(self.entab("M|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), VcfRecord.parse_record(self.entab("X|1|ID|A|A|QUAL|FILTER|INFO|F|S\n"), sample_names), ] input_records = expected_records[::-1] self.assertEquals(expected_records, sorted(input_records))
def test_parse_record_initsSampleTagValues(self): sample_names = ["SampleA", "SampleB"] input_line = self.entab( "CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|F1:F2:F3|SA.1:SA.2:SA.3|SB.1:SB.2:SB.3\n" ) record = VcfRecord.parse_record(input_line, sample_names) self.assertEquals(["SampleA", "SampleB"], sorted(record.sample_tag_values.keys())) self.assertEquals({ "F1": "SA.1", "F2": "SA.2", "F3": "SA.3" }, record.sample_tag_values["SampleA"]) self.assertEquals({ "F1": "SB.1", "F2": "SB.2", "F3": "SB.3" }, record.sample_tag_values["SampleB"])
def test_equals(self): sample_names = ["sampleA"] base = VcfRecord.parse_record(self.entab("A|1|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names) base_equivalent = VcfRecord.parse_record(self.entab("A|1|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertEquals(base, base_equivalent) different_chrom = VcfRecord.parse_record(self.entab("Z|1|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertNotEquals(base, different_chrom) different_pos = VcfRecord.parse_record(self.entab("A|2|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertNotEquals(base, different_pos) different_ref = VcfRecord.parse_record(self.entab("A|1|ID|Z|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertNotEquals(base, different_ref) different_alt = VcfRecord.parse_record(self.entab("A|1|ID|C|Z|QUAL|FILTER||foo|S\n"), sample_names) self.assertNotEquals(base, different_alt)
def test_equals(self): sample_names = ["sampleA"] base = VcfRecord.parse_record( self.entab("A|1|ID|C|D|QUAL|FILTER|INFO|F|S\n"), sample_names) base_equivalent = VcfRecord.parse_record( self.entab("A|1|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertEquals(base, base_equivalent) different_chrom = VcfRecord.parse_record( self.entab("Z|1|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertNotEquals(base, different_chrom) different_pos = VcfRecord.parse_record( self.entab("A|2|ID|C|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertNotEquals(base, different_pos) different_ref = VcfRecord.parse_record( self.entab("A|1|ID|Z|D|QUAL|FILTER||foo|S\n"), sample_names) self.assertNotEquals(base, different_ref) different_alt = VcfRecord.parse_record( self.entab("A|1|ID|C|Z|QUAL|FILTER||foo|S\n"), sample_names) self.assertNotEquals(base, different_alt)
def test_add_info_field_nonAssignedField(self): sample_names = ["SampleA"] input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|k1=v1;k2=v2;baz|F|S\n") vcf_record = VcfRecord.parse_record(input_line, sample_names) vcf_record.add_info_field("foo") self.assertEquals({"k1": "v1", "k2": "v2", "baz": "baz", "foo": "foo"}, vcf_record.info_dict)
def test_sample_tag_values_emptyDictWhenExplicitNullSampleData(self): input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|.|.|.\n") record = VcfRecord.parse_record(input_line, sample_names=["sampleA", "sampleB"]) self.assertEquals(["sampleA", "sampleB"], sorted(record.sample_tag_values.keys())) self.assertEquals({}, record.sample_tag_values["sampleA"]) self.assertEquals({}, record.sample_tag_values["sampleB"])
def test_join_info_fields_orderedCorrectly(self): vcf_record = VcfRecord("chr1", "2", "A", "G", info="FOO;BAR;BAZ") vcf_record._join_info_fields() self.assertEquals("FOO;BAR;BAZ", vcf_record.info)
def test_add_or_replace_filter_duplicateFilterNotAdded(self): record = VcfRecord("chr1", "42", "XYZ", "C", vcf_filter="JQ_EXCLUDE") record.add_or_replace_filter("JQ_EXCLUDE") self.assertEquals("JQ_EXCLUDE", record.filter)
def test_format_field_emptyWhenNoSamples(self): input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO\n") record = VcfRecord.parse_record(input_line, []) self.assertEquals(".", record._format_field())
def test_sample_tag_values_preservesSampleOrder(self): input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|||\n") record = VcfRecord.parse_record(input_line, sample_names=["sampleB", "sampleA"]) self.assertEquals(["sampleA", "sampleB"], sorted(record.sample_tag_values.keys()))
def test_sample_tag_values(self): sample_tag_values = VcfRecord._sample_tag_values( ["sampleA", "sampleB"], "foo:bar", ["SA_foo:SA_bar", "SB_foo:SB_bar"] ) self.assertEquals({"foo": "SA_foo", "bar": "SA_bar"}, sample_tag_values["sampleA"]) self.assertEquals({"foo": "SB_foo", "bar": "SB_bar"}, sample_tag_values["sampleB"])
def test_sample_tag_values_whenSparseSampleData(self): input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO|FOO|.|.\n") record = VcfRecord.parse_record(input_line, sample_names=["sampleA", "sampleB"]) self.assertEquals(["sampleA", "sampleB"], sorted(record.sample_tag_values.keys())) self.assertEquals(OrderedDict({"FOO": "."}), record.sample_tag_values["sampleA"]) self.assertEquals(OrderedDict({"FOO": "."}), record.sample_tag_values["sampleB"])
def test_sample_tag_values_emptyDictWhenNoSamples(self): input_line = self.entab("CHROM|POS|ID|REF|ALT|QUAL|FILTER|INFO\n") record = VcfRecord.parse_record(input_line, sample_names=["sampleA", "sampleB"]) self.assertEquals({}, record.sample_tag_values)
def test_add_or_replace_filter_filterReplacesEmptyFilter(self): record = VcfRecord("chr1", "42", "X", "C", vcf_filter="") record.add_or_replace_filter("JQ_EXCLUDE") self.assertEquals("JQ_EXCLUDE", record.filter)
def test_get_info_dict_null(self): vcf_record = VcfRecord("chr1", "42", "A", "C", info=".") self.assertEquals({}, vcf_record.info_dict)
def test_add_or_replace_filter_filtersOnlyAppendsUnique(self): record = VcfRecord("chr1", "42", "XYZ", "C", vcf_filter="indelError") record.add_or_replace_filter("JQ_EXCLUDE") record.add_or_replace_filter("JQ_EXCLUDE") self.assertEquals("indelError;JQ_EXCLUDE", record.filter)