def test_write_annotated_record(header_samples, tmpdir_factory): O = vcfpy.OrderedDict S = record.Substitution # open temporary file and setup the Writer with header path = tmpdir_factory.mktemp("write_annotated_record").join("out.vcf") header, _ = header_samples w = writer.Writer.from_path(path, header) # construct record to write out from scratch r = record.Record( "20", 100, ["rs333", "CSN42"], "C", [ record.Substitution(record.SNV, "T"), record.Substitution(record.SNV, "G") ], 50, ["PASS"], O([("DP", 93), ("AF", [0.3, 0.2]), ("DB", True)]), ["GT", "DP", "GQ", "HQ"], [ record.Call("NA00001", O(GT="0/1", DP=30, GQ=40, HQ=[1, 2])), record.Call("NA00002", O(GT="0/2", DP=31, GQ=41, HQ=[3, 4])), record.Call("NA00003", O(GT="1/2", DP=32, GQ=42, HQ=[5, 6])), ], ) # write out the record, close file to ensure flushing to disk w.write_record(r) w.close() # compare actual result with expected RESULT = path.read() LINE = "20\t100\trs333;CSN42\tC\tT,G\t50\tPASS\tDP=93;AF=0.3,0.2;DB\tGT:DP:GQ:HQ\t0/1:30:40:1,2\t0/2:31:41:3,4\t1/2:32:42:5,6\n" EXPECTED = MEDIUM_HEADER + LINE assert EXPECTED == RESULT
def test_write_minimal_record(header_samples, tmpdir_factory): O = vcfpy.OrderedDict # open temporary file and setup the Writer with header path = tmpdir_factory.mktemp("write_header").join("out.vcf") header, _ = header_samples w = writer.Writer.from_path(path, header) # construct record to write out from scratch r = record.Record( "20", 100, [], "C", [record.Substitution(record.SNV, "T")], None, [], O(), ["GT"], [ record.Call("NA00001", O(GT="0/1")), record.Call("NA00002", O(GT="0/0")), record.Call("NA00003", O(GT="1/1")), ], ) # write out the record, close file to ensure flushing to disk w.write_record(r) w.close() # compare actual result with expected RESULT = path.read() LINE = "20\t100\t.\tC\tT\t.\t.\t.\tGT\t0/1\t0/0\t1/1\n" EXPECTED = MEDIUM_HEADER + LINE assert EXPECTED == RESULT
def test_write_record_with_escaping(header_samples, tmpdir_factory): O = vcfpy.OrderedDict S = record.Substitution # open temporary file and setup the Writer with header path = tmpdir_factory.mktemp("write_header").join("out.vcf") header, _ = header_samples w = writer.Writer.from_path(path, header) # construct record to write out from scratch r = record.Record( "20", 100, [], "C", [record.Substitution(record.SNV, "T")], None, [], O([("ANNO", ["Here,are%some chars", "%25"])]), ["GT", "FT"], [ record.Call("NA00001", O(GT="0/1", FT=["%25", "FOO"])), record.Call("NA00002", O(GT="0/0", FT=[])), record.Call("NA00003", O(GT="1/1", FT=[])), ], ) # write out the record, close file to ensure flushing to disk w.write_record(r) w.close() # compare actual result with expected RESULT = path.read() LINE = ("20\t100\t.\tC\tT\t.\t.\tANNO=Here%2Care%25some chars," "%2525\tGT:FT\t0/1:%2525;FOO\t0/0:.\t1/1:.\n") EXPECTED = MEDIUM_HEADER + LINE assert EXPECTED == RESULT
def test_write_minimal_record_writer_from_stream_path(header_samples, tmpdir_factory): O = vcfpy.OrderedDict # open temporary file and setup the Writer with header path = tmpdir_factory.mktemp("write_header").join("out.vcf.gz") header, _ = header_samples with open(str(path), "wb") as f: w = writer.Writer.from_stream(f, header, path=str(path)) # construct record to write out from scratch r = record.Record( "20", 100, [], "C", [record.Substitution(record.SNV, "T")], None, [], O(), ["GT"], [ record.Call("NA00001", O(GT="0/1")), record.Call("NA00002", O(GT="0/0")), record.Call("NA00003", O(GT="1/1")), ], ) # write out the record, close file to ensure flushing to disk w.write_record(r) w.close() # check the resulting record LINE = "20\t100\t.\tC\tT\t.\t.\t.\tGT\t0/1\t0/0\t1/1\n" check_file(path, LINE)
def build_rec(calls=None, format_extras=None): calls = calls or [] format_extras = format_extras or [] alt1 = record.Substitution(vcfpy.SNV, "T") alt2 = record.Substitution(vcfpy.SNV, "A") return record.Record( "2", 100, [], "C", [alt1, alt2], None, [], vcfpy.OrderedDict(), ["GT"] + format_extras, calls, )
def test_write_record_no_samples(tmpdir_factory): O = vcfpy.OrderedDict # Create header without samples hdr = header.Header(lines=[header.HeaderLine("fileformat", "VCFv4.0")], samples=header.SamplesInfos([])) # construct record to write out from scratch r = record.Record("20", 100, [], "C", [record.Substitution(record.SNV, "T")], None, [], O()) # Write out header and record path = tmpdir_factory.mktemp("write_header").join("out.vcf") w = writer.Writer.from_path(path, hdr) w.write_record(r) w.close() # Compare result RESULT = path.read() EXPECTED = textwrap.dedent(""" ##fileformat=VCFv4.0 #CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO 20\t100\t.\tC\tT\t.\t.\t. """).lstrip() assert RESULT == EXPECTED