def test_split_empty_sample_data_string(self): schema = self.__get_example_schema("vcf_example.vcf") cols = """1\t11082325\tRS1\tG\tC,A\t.\t.\tPP=.;DP=.;DPR=.;DPF=.;VC=.;VCR=.;VCF=.;ABPV=.;SBPV=.;MQ=.;BR=.;QD=.\tGT:PL:GQ\t1|0:3000,0,3000:1000\t1|1:2000,0,1000:3""".split( ) # noqa first_record = next(record.generate_records(schema, cols)) self.assertEqual(first_record.alt, 'C') self.assertTrue(first_record.from_multi_alt) second_record = next(record.generate_records(schema, cols)) self.assertEqual(first_record.info, second_record.info)
def test_should_return_true_if_no_GL_or_PL_present(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.samples = ['foo'] records = list( generate_records( schema, ['chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT', '0/1'])) self.assertTrue(records[0].sample_info.has_no_likelihoods())
def test_should_return_false_if_one_sample_okay_for_GL(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.set_sample_data('GL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT:GL', '0/1:90,1,120', '0/1:.,.,.' ])) self.assertFalse(records[0].sample_info.has_no_likelihoods())
def test_should_return_true_if_all_likelihoods_are_none_for_PL(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.set_sample_data('PL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT:PL', '0/1:.,.,.' ])) self.assertTrue(records[0].sample_info.has_no_likelihoods())
def test_should_warn_about_unrecognised_key_in_monoallelic_line(self, log): records = list(generate_records(Schema(), [ 'chrZ', '200', '.', 'C', 'T', '.', 'PASS', 'NEW_KEY=value' ])) for index, record in enumerate(records): self.assertEqual( (index, ['value']), (index, record.info['NEW_KEY'])) log.check( ('root', 'WARNING', 'info field {!r} not defined in schema'.format('NEW_KEY')), )
def test_should_warn_about_too_many_alts_in_field_of_allelic_cardinality( self, log): schema = Schema() schema.set_info_data('key', 'A', 'String', '') records = list(generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', 'key=a,b,c' ])) expected = [['a'], ['b']] for index, record in enumerate(records): self.assertEqual(expected[index], record.info['key']) log.check(('wecall.vcfutils.fieldmetadata', 'WARNING', 'expected 2 items in {!r}'.format([['a'], ['b'], ['c']])), )
def test_should_warn_when_GT_is_not_present(self, log): schema = Schema() schema.set_sample_data('GL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GL', '1,2,3' ])) for index, record in enumerate(records): self.assertEqual( (index, ['1', '2', '3']), (index, record.sample_info.get_field('foo', 'GL'))) log.check(('wecall.vcfutils.fieldmetadata', 'WARNING', 'Unknown ploidy when parsing genotype likelihood'), )
def test_should_drop_genotype_likelihood_with_mismatch_ploidy(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.set_sample_data('GL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GT:GL', '0/1:1,2,3,4' ])) self.assertEqual(GenotypeCall("0/1"), records[0].sample_info.get_field('foo', 'GT')) self.assertEqual([None, None, None], records[0].sample_info.get_field('foo', 'GL')) self.assertEqual(GenotypeCall("0/0"), records[1].sample_info.get_field('foo', 'GT')) self.assertEqual([None, None, None], records[1].sample_info.get_field('foo', 'GL'))
def test_should_split_genotype_likelihood_properly(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.set_sample_data('GL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GT:GL', '0/1:1,2,3,4,5,6' ])) self.assertEqual(GenotypeCall("0/1"), records[0].sample_info.get_field('foo', 'GT')) self.assertEqual([1.0, 2.0, 3.0], records[0].sample_info.get_field('foo', 'GL')) self.assertEqual(GenotypeCall("0/0"), records[1].sample_info.get_field('foo', 'GT')) self.assertEqual([1.0, 4.0, 6.0], records[1].sample_info.get_field('foo', 'GL'))
def test_should_add_default_parsing_rule_for_unknown_key_in_multiallelic_line(self): schema = Schema() records = list(generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', 'NEW_KEY=value' ])) self.assertEqual(0, len(list(schema.iter_info_data()))) for index, record in enumerate(records): self.assertEqual( (index, ['value']), (index, record.info['NEW_KEY'])) self.assertEqual(1, len(list(schema.iter_info_data()))) info_metadata = schema.get_info_data('NEW_KEY') self.assertEqual('.', info_metadata.number) self.assertEqual('String', info_metadata.data_type) self.assertEqual( 'Inferred from file content during parsing', info_metadata.description) self.assertEqual('vcfutils', info_metadata.source) self.assertEqual('undefined', info_metadata.version)