def test_writer(yrb_writer, capsys): yrb_writer = kevlar.vcf.VCFWriter(sys.stdout, source='py.test') yrb_writer.register_sample('NA19238') yrb_writer.register_sample('NA19239') yrb_writer.register_sample('NA19240') yrb_writer.describe_format('GT', 'String', '1', 'Genotype') yrb_writer.write_header() v = Variant('1', 12345, 'G', 'C') v.annotate('PART', '42') v.annotate('CONTIG', 'A' * 100) v.format('NA19238', 'GT', '0/0') v.format('NA19239', 'GT', '0/0') v.format('NA19240', 'GT', '0/1') v.format('NA19238', 'ALTABUND', '12,9,8') v.format('NA19239', 'ALTABUND', '0,0,0') v.format('NA19240', 'ALTABUND', '0,0,0') yrb_writer.write(v) out, err = capsys.readouterr() print(out) outlines = out.strip().split('\n') fmtlines = [l for l in outlines if l.startswith('##FORMAT')] assert len(fmtlines) == 2 gtfmt = '##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">' assert gtfmt in fmtlines varlines = [l for l in outlines if not l.startswith('#')] assert len(varlines) == 1 values = varlines[0].split('\t') assert len(values) == 12 assert values[8:12] == [ 'ALTABUND:GT', '12,9,8:0/0', '0,0,0:0/0', '0,0,0:0/1' ]
def test_writer_bad_fmt(yrb_writer): v = Variant('1', 12345, 'G', 'C') v.annotate('PART', '42') v.annotate('CONTIG', 'A' * 100) v.format('NA19238', 'GT', '0/0') v.format('NA19240', 'GT', '0/1') v.format('NA19239', 'ALTABUND', '0,0,0') v.format('NA19240', 'ALTABUND', '0,0,0') errormsg = r'samples not annotated with the same FORMAT fields' with pytest.raises(kevlar.vcf.VariantAnnotationError, match=errormsg): yrb_writer.write(v)
def test_info(): """Test handling of "info" field attributes. This tests the mechanics of the .annotate() and .attribute() API, and the FormattedList class underpinning it. """ values = FormattedList() assert str(values) == '.' values.append(42) assert str(values) == '42' values.append(1776) assert str(values) == '42,1776' values.append('B0gU$') with pytest.raises(kevlar.vcf.KevlarMixedDataTypeError): str(values) v = Variant('1', 12345, 'G', 'C') assert v.attribute('VW') is None v.annotate('VW', 'AGTNNNNNNNNNNNNNNNNNNNNNTGA') assert v.attribute('VW') == 'AGTNNNNNNNNNNNNNNNNNNNNNTGA' v.annotate('VW', 'GATTACA') assert v.attribute('VW') == 'GATTACA' assert v.attribute('VW', pair=True) == 'VW=GATTACA' v.annotate('VW', 'ATGCCCTAG', replace=False) assert v.info['VW'] == ['GATTACA', 'ATGCCCTAG'] assert v.attribute('VW') == ['GATTACA', 'ATGCCCTAG'] assert v.attribute('VW', string=True) == 'GATTACA,ATGCCCTAG' assert v.attribute('VW', pair=True) == 'VW=GATTACA,ATGCCCTAG' v.annotate('VW', 'AAAAAAAAA', replace=False) assert v.attribute('VW') == ['GATTACA', 'ATGCCCTAG', 'AAAAAAAAA'] assert v.attribute('VW', pair=True) == 'VW=GATTACA,ATGCCCTAG,AAAAAAAAA' v.annotate('DROPPED', 3) assert v.attribute('DROPPED') == 3 assert v.attribute('DROPPED', string=True) == '3' v.annotate('DROPPED', 31, replace=False) assert v.attribute('DROPPED') == [3, 31] assert v.attribute('DROPPED', string=True) == '3,31' assert v.attribute('DROPPED', pair=True) == 'DROPPED=3,31' v.annotate('MATEDIST', 432.1234, replace=False) v.annotate('MATEDIST', 8765.4321, replace=False) assert v.attribute('MATEDIST', string=True) == '432.123,8765.432' v.annotate('LLIH', -436.0111857750478) assert v.attribute('LLIH', pair=True) == 'LLIH=-436.011'