def test_header_contig_header_line(): line1 = header.ContigHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "1"), ("length", 234)])) line2 = header.ContigHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "1"), ("length", 234)])) line3 = header.ContigHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "2"), ("length", 123)])) assert line1 == line2 assert line1 != line3 if sys.version_info < (3, 6): assert str(line1) == ( "ContigHeaderLine('contig', '<ID=1,length=234>', OrderedDict([('ID', '1'), ('length', 234)]))" ) assert repr(line1) == ( "ContigHeaderLine('contig', '<ID=1,length=234>', OrderedDict([('ID', '1'), ('length', 234)]))" ) else: assert str(line1) == ( "ContigHeaderLine('contig', '<ID=1,length=234>', {'ID': '1', 'length': 234})" ) assert repr(line1) == ( "ContigHeaderLine('contig', '<ID=1,length=234>', {'ID': '1', 'length': 234})" ) assert line1.value == "<ID=1,length=234>" assert line1.serialize() == "##contig=<ID=1,length=234>" with pytest.raises(TypeError): hash(line1)
def test_header_info_header_line(): line1 = header.InfoHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "SVTYPE"), ("Number", 1), ("Type", "String")])) line2 = header.InfoHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "SVTYPE"), ("Number", 1), ("Type", "String")])) line3 = header.InfoHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "END"), ("Number", 1), ("Type", "Integer")])) assert line1 == line2 assert line1 != line3 if sys.version_info < (3, 6): assert str(line1) == ( "InfoHeaderLine('INFO', '<ID=SVTYPE,Number=1,Type=String>', " "OrderedDict([('ID', 'SVTYPE'), ('Number', 1), ('Type', 'String')]))" ) assert repr(line1) == ( "InfoHeaderLine('INFO', '<ID=SVTYPE,Number=1,Type=String>', " "OrderedDict([('ID', 'SVTYPE'), ('Number', 1), ('Type', 'String')]))" ) else: assert str(line1) == ( "InfoHeaderLine('INFO', '<ID=SVTYPE,Number=1,Type=String>', " "{'ID': 'SVTYPE', 'Number': 1, 'Type': 'String'})") assert repr(line1) == ( "InfoHeaderLine('INFO', '<ID=SVTYPE,Number=1,Type=String>', " "{'ID': 'SVTYPE', 'Number': 1, 'Type': 'String'})") assert line1.value == "<ID=SVTYPE,Number=1,Type=String>" assert line1.serialize() == "##INFO=<ID=SVTYPE,Number=1,Type=String>" with pytest.raises(TypeError): hash(line1)
def test_header_format_header_line(): line1 = header.FormatHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "AD"), ("Number", "R"), ("Type", "Integer")])) line2 = header.FormatHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "AD"), ("Number", "R"), ("Type", "Integer")])) line3 = header.FormatHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "DP"), ("Number", 1), ("Type", "Integer")])) assert line1 == line2 assert line1 != line3 if sys.version_info < (3, 6): assert str(line1) == ( "FormatHeaderLine('FORMAT', '<ID=AD,Number=R,Type=Integer>', " "OrderedDict([('ID', 'AD'), ('Number', 'R'), ('Type', 'Integer')]))" ) assert repr(line1) == ( "FormatHeaderLine('FORMAT', '<ID=AD,Number=R,Type=Integer>', " "OrderedDict([('ID', 'AD'), ('Number', 'R'), ('Type', 'Integer')]))" ) else: assert str(line1) == ( "FormatHeaderLine('FORMAT', '<ID=AD,Number=R,Type=Integer>', " "{'ID': 'AD', 'Number': 'R', 'Type': 'Integer'})") assert repr(line1) == ( "FormatHeaderLine('FORMAT', '<ID=AD,Number=R,Type=Integer>', " "{'ID': 'AD', 'Number': 'R', 'Type': 'Integer'})") assert line1.value == "<ID=AD,Number=R,Type=Integer>" assert line1.serialize() == "##FORMAT=<ID=AD,Number=R,Type=Integer>" with pytest.raises(TypeError): hash(line1)
def test_header_sample_header_line(): line1 = header.SampleHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "sample1")])) line2 = header.SampleHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "sample1")])) line3 = header.SampleHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "sample2")])) assert line1 == line2 assert line1 != line3 if sys.version_info < (3, 6): assert str(line1) == ( "SampleHeaderLine('SAMPLE', '<ID=sample1>', OrderedDict([('ID', 'sample1')]))" ) assert repr(line1) == ( "SampleHeaderLine('SAMPLE', '<ID=sample1>', OrderedDict([('ID', 'sample1')]))" ) else: assert str(line1) == ( "SampleHeaderLine('SAMPLE', '<ID=sample1>', {'ID': 'sample1'})") assert repr(line1) == ( "SampleHeaderLine('SAMPLE', '<ID=sample1>', {'ID': 'sample1'})") assert line1.value == "<ID=sample1>" assert line1.serialize() == "##SAMPLE=<ID=sample1>" with pytest.raises(TypeError): hash(line1)
def test_header_pedigree_header_line(): line1 = header.PedigreeHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "child"), ("Father", "father")])) line2 = header.PedigreeHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "child"), ("Father", "father")])) line3 = header.PedigreeHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "father")])) assert line1 == line2 assert line1 != line3 if sys.version_info < (3, 6): assert str(line1) == ( "PedigreeHeaderLine('PEDIGREE', '<ID=child,Father=father>', " "OrderedDict([('ID', 'child'), ('Father', 'father')]))") assert repr(line1) == ( "PedigreeHeaderLine('PEDIGREE', '<ID=child,Father=father>', " "OrderedDict([('ID', 'child'), ('Father', 'father')]))") else: assert str(line1) == ( "PedigreeHeaderLine('PEDIGREE', '<ID=child,Father=father>', {'ID': 'child', 'Father': 'father'})" ) assert repr(line1) == ( "PedigreeHeaderLine('PEDIGREE', '<ID=child,Father=father>', {'ID': 'child', 'Father': 'father'})" ) assert line1.value == "<ID=child,Father=father>" assert line1.serialize() == "##PEDIGREE=<ID=child,Father=father>" with pytest.raises(TypeError): hash(line1)
def test_header_filter_header_line(): line1 = header.FilterHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "PASS"), ("Description", "All filters passed")])) line2 = header.FilterHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "PASS"), ("Description", "All filters passed")])) line3 = header.FilterHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "q30"), ("Description", "Phred score <30")])) assert line1 == line2 assert line1 != line3 if sys.version_info < (3, 6): assert str(line1) == ( "FilterHeaderLine('FILTER', '<ID=PASS,Description=\"All filters passed\">', " "OrderedDict([('ID', 'PASS'), ('Description', 'All filters passed')]))" ) assert repr(line1) == ( "FilterHeaderLine('FILTER', '<ID=PASS,Description=\"All filters passed\">', " "OrderedDict([('ID', 'PASS'), ('Description', 'All filters passed')]))" ) else: assert str(line1) == ( "FilterHeaderLine('FILTER', '<ID=PASS,Description=\"All filters passed\">', " "{'ID': 'PASS', 'Description': 'All filters passed'})") assert repr(line1) == ( "FilterHeaderLine('FILTER', '<ID=PASS,Description=\"All filters passed\">', " "{'ID': 'PASS', 'Description': 'All filters passed'})") assert line1.value == '<ID=PASS,Description="All filters passed">' assert line1.serialize( ) == '##FILTER=<ID=PASS,Description="All filters passed">' with pytest.raises(TypeError): hash(line1)
def test_header_alt_allele_header_line(): line1 = header.AltAlleleHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "DEL"), ("Description", "deletion")])) line2 = header.AltAlleleHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "DEL"), ("Description", "deletion")])) line3 = header.AltAlleleHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "DUP"), ("Description", "duplication")])) assert line1 == line2 assert line1 != line3 if sys.version_info < (3, 6): assert str(line1) == ( """AltAlleleHeaderLine('ALT', '<ID=DEL,Description="deletion">', """ """OrderedDict([('ID', 'DEL'), ('Description', 'deletion')]))""") assert repr(line1) == ( """AltAlleleHeaderLine('ALT', '<ID=DEL,Description="deletion">', """ """OrderedDict([('ID', 'DEL'), ('Description', 'deletion')]))""") else: assert str(line1) == ( "AltAlleleHeaderLine('ALT', '<ID=DEL,Description=\"deletion\">', " "{'ID': 'DEL', 'Description': 'deletion'})") assert repr(line1) == ( "AltAlleleHeaderLine('ALT', '<ID=DEL,Description=\"deletion\">', " "{'ID': 'DEL', 'Description': 'deletion'})") assert line1.value == '<ID=DEL,Description="deletion">' assert line1.serialize() == '##ALT=<ID=DEL,Description="deletion">' with pytest.raises(TypeError): hash(line1)
def test_header_without_lines(): lines = [ header.HeaderLine("foo", "bar"), header.HeaderLine("foo2", "bar2") ] samples = header.SamplesInfos(["one", "two", "three"]) hdr = header.Header(lines, samples) hdr.add_filter_line(vcfpy.OrderedDict([("ID", "PASS")])) hdr.add_filter_line(vcfpy.OrderedDict([("ID", "q30")])) assert len(hdr.lines) == 4 hdr2 = header.header_without_lines(hdr, [("foo", "bar"), ("FILTER", "q30")]) assert len(hdr2.lines) == 2 assert hdr2.samples == hdr.samples
def test_add_contig_line_shortcut(vcf_header): # check header before adding assert len(vcf_header.lines) == 18 assert "20a" not in vcf_header._indices["contig"] # add header line mapping = vcfpy.OrderedDict([ ("ID", "20a"), ("length", 62435964), ("assembly", "B36"), ("md5", "f126cdf8a6e0c7f379d618ff66beb2da"), ("species", "H**o sapiens"), ("taxonomy", "x"), ]) vcf_header.add_contig_line(mapping) # check header after adding assert len(vcf_header.lines) == 19 assert "20a" in vcf_header._indices["contig"] assert vcf_header._indices["contig"]["20a"] is vcf_header.lines[-1] # Check resulting added header line assert vcf_header.lines[-1].key == "contig" VALUE = ("<ID=20a,length=62435964,assembly=B36," "md5=f126cdf8a6e0c7f379d618ff66beb2da," 'species="H**o sapiens",taxonomy=x>') assert vcf_header.lines[-1].value == VALUE assert len(vcf_header.lines[-1].mapping) == 6 assert vcf_header.lines[-1].mapping["ID"] == "20a" assert vcf_header.lines[-1].mapping["length"] == 62435964 assert vcf_header.lines[-1].mapping["assembly"] == "B36" assert vcf_header.lines[-1].mapping[ "md5"] == "f126cdf8a6e0c7f379d618ff66beb2da" assert vcf_header.lines[-1].mapping["species"] == "H**o sapiens" assert vcf_header.lines[-1].mapping["taxonomy"] == "x"
def test_add_info_line_shortcut(vcf_header): # check header before adding assert len(vcf_header.lines) == 18 # add header line VALUE = '<ID=DPa,Number=1,Type=Integer,Description="Total Depth">' mapping = vcfpy.OrderedDict([("ID", "DPa"), ("Number", 1), ("Type", "Integer"), ("Description", "Total Depth")]) vcf_header.add_info_line(mapping) assert len(vcf_header.lines) == 19 # check header after adding assert len(vcf_header.lines) == 19 assert "DPa" in vcf_header._indices["INFO"] assert vcf_header._indices["INFO"]["DPa"] is vcf_header.lines[-1] # Check resulting added header line assert vcf_header.lines[-1].key == "INFO" assert vcf_header.lines[-1].value == VALUE assert len(vcf_header.lines[-1].mapping) == 4 assert vcf_header.lines[-1].mapping["ID"] == "DPa" assert vcf_header.lines[-1].mapping["Number"] == 1 assert vcf_header.lines[-1].mapping["Type"] == "Integer" assert vcf_header.lines[-1].mapping["Description"] == "Total Depth"
def test_add_format_line(vcf_header): # check header before adding assert len(vcf_header.lines) == 18 # add header line VALUE = '<ID=GTa,Number=1,Type=String,Description="Genotype">' line = header.FormatHeaderLine( "FORMAT", VALUE, vcfpy.OrderedDict([("ID", "GTa"), ("Number", 1), ("Type", "String"), ("Description", "Genotype")]), ) vcf_header.add_line(line) # check header after adding assert len(vcf_header.lines) == 19 assert "GTa" in vcf_header._indices["FORMAT"] assert vcf_header._indices["FORMAT"]["GTa"] is vcf_header.lines[-1] # Check resulting added header line assert vcf_header.lines[-1].key == "FORMAT" assert vcf_header.lines[-1].value == VALUE assert len(vcf_header.lines[-1].mapping) == 4 assert vcf_header.lines[-1].mapping["ID"] == "GTa" assert vcf_header.lines[-1].mapping["Number"] == 1 assert vcf_header.lines[-1].mapping["Type"] == "String" assert vcf_header.lines[-1].mapping["Description"] == "Genotype"
def test_header_has_header_line_positive(): lines = [ header.FormatHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "DP"), ("Number", "R"), ("Type", "Integer")])), header.InfoHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "AD"), ("Number", "R"), ("Type", "Integer")])), header.FilterHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "PASS"), ("Description", "All filters passed")])), header.ContigHeaderLine.from_mapping( vcfpy.OrderedDict([("ID", "1"), ("length", 234)])), ] samples = header.SamplesInfos(["one", "two", "three"]) hdr = header.Header(lines, samples) assert hdr.has_header_line("FORMAT", "DP") assert hdr.has_header_line("INFO", "AD") assert hdr.has_header_line("FILTER", "PASS") assert hdr.has_header_line("contig", "1")
def build_rec(calls=None, format_extras=None): calls = calls or [] format_extras = format_extras or [] alt1 = record.Substitution(vcfpy.SNV, "T") alt2 = record.Substitution(vcfpy.SNV, "A") return record.Record( "2", 100, [], "C", [alt1, alt2], None, [], vcfpy.OrderedDict(), ["GT"] + format_extras, calls, )
def test_add_filter_line_shortcut(vcf_header): # check header before adding assert len(vcf_header.lines) == 18 # add header line mapping = vcfpy.OrderedDict([("ID", "q10a"), ("Description", "Quality below 10")]) vcf_header.add_filter_line(mapping) # check header after adding assert len(vcf_header.lines) == 19 assert "q10a" in vcf_header._indices["FILTER"] assert vcf_header._indices["FILTER"]["q10a"] is vcf_header.lines[-1] # Check resulting added header line assert vcf_header.lines[-1].key == "FILTER" VALUE = '<ID=q10a,Description="Quality below 10">' assert vcf_header.lines[-1].value == VALUE assert len(vcf_header.lines[-1].mapping) == 2 assert vcf_header.lines[-1].mapping["ID"] == "q10a" assert vcf_header.lines[-1].mapping["Description"] == "Quality below 10"
def test_gt_phase_char_slash(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/1")])) assert call.gt_phase_char == "/"
def test_gt_phase_char_pipe(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|1")])) assert call.gt_phase_char == "|"
def test_is_phased_false(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/1")])) assert call.is_phased is False
def test_is_phased_mixed(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/1|2")])) assert call.is_phased is True
def test_gt_type_filtered_pass(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "1/1"), ("FT", ["PASS"])])) assert not call.is_filtered()
def test_gt_type_filtered_no_ft(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "1/1")])) assert not call.is_filtered()
def test_is_variant_hom_alt(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "1/1")])) assert call.is_variant
def test_is_variant_hom_ref(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/0")])) assert not call.is_variant
def test_gt_type_het(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|1")])) assert call.gt_type == vcfpy.HET
def test_gt_bases_1_1(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "1|1")])) build_rec([call]) assert call.gt_bases == ("T", "T")
def test_gt_bases_0_2(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|2")])) build_rec([call]) assert call.gt_bases == ("C", "A")
def test_gt_type_hom_alt(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "1/1")])) assert call.gt_type == vcfpy.HOM_ALT
def test_gt_type_hom_ref(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0/0")])) assert call.gt_type == vcfpy.HOM_REF
def test_is_het_het(): call = record.Call("sample", vcfpy.OrderedDict([("GT", "0|1")])) assert call.is_het