def test_should_return_true_if_no_GL_or_PL_present(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.samples = ['foo'] records = list( generate_records( schema, ['chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT', '0/1'])) self.assertTrue(records[0].sample_info.has_no_likelihoods())
def test_should_return_false_if_one_sample_okay_for_GL(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.set_sample_data('GL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT:GL', '0/1:90,1,120', '0/1:.,.,.' ])) self.assertFalse(records[0].sample_info.has_no_likelihoods())
def test_should_return_true_if_all_likelihoods_are_none_for_PL(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.set_sample_data('PL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A', '.', 'PASS', '.', 'GT:PL', '0/1:.,.,.' ])) self.assertTrue(records[0].sample_info.has_no_likelihoods())
def test_should_parse_column_headers_with_complex_sample_names(self): lines = [ '##fileformat=VCFv4.2\n', '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tOWEN_TOBY-RHYS.JONES\n', ] reader = VCFReader(iter(lines)) header = reader.read_header() expected = Schema() expected.samples = ['OWEN_TOBY-RHYS.JONES'] self.assertEqual(expected, header)
def test_should_warn_when_GT_is_not_present(self, log): schema = Schema() schema.set_sample_data('GL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GL', '1,2,3' ])) for index, record in enumerate(records): self.assertEqual( (index, ['1', '2', '3']), (index, record.sample_info.get_field('foo', 'GL'))) log.check(('wecall.vcfutils.fieldmetadata', 'WARNING', 'Unknown ploidy when parsing genotype likelihood'), )
def test_should_drop_genotype_likelihood_with_mismatch_ploidy(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.set_sample_data('GL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GT:GL', '0/1:1,2,3,4' ])) self.assertEqual(GenotypeCall("0/1"), records[0].sample_info.get_field('foo', 'GT')) self.assertEqual([None, None, None], records[0].sample_info.get_field('foo', 'GL')) self.assertEqual(GenotypeCall("0/0"), records[1].sample_info.get_field('foo', 'GT')) self.assertEqual([None, None, None], records[1].sample_info.get_field('foo', 'GL'))
def test_should_split_genotype_likelihood_properly(self): schema = Schema() schema.set_sample_data('GT', '1', 'String', '') schema.set_sample_data('GL', 'G', 'Float', '') schema.samples = ['foo'] records = list( generate_records(schema, [ 'chrZ', '200', '.', 'C', 'A,T', '.', 'PASS', '.', 'GT:GL', '0/1:1,2,3,4,5,6' ])) self.assertEqual(GenotypeCall("0/1"), records[0].sample_info.get_field('foo', 'GT')) self.assertEqual([1.0, 2.0, 3.0], records[0].sample_info.get_field('foo', 'GL')) self.assertEqual(GenotypeCall("0/0"), records[1].sample_info.get_field('foo', 'GT')) self.assertEqual([1.0, 4.0, 6.0], records[1].sample_info.get_field('foo', 'GL'))