Exemple #1
0
 def test_sample_data_copes_with_mixed_missing_values_in_PL(self):
     sample_name = 'sample_name'
     sample_data = SampleData(['PL'], [sample_name])
     sample_data.add_sample_data(sample_name, 'PL',
                                 [-0.1, '.', -0.2, None, -0.3])
     self.assertEqual(sample_data.get_genotype_likelihoods(sample_name),
                      [0.01, None, 0.02, None, 0.03])
Exemple #2
0
 def test_should_allow_multiple_samples_for_add_sample_data(self):
     sample_data = SampleData(['genotype_key1'],
                              ['sample_name1', 'sample_name2'])
     sample_data.add_sample_data('sample_name1', 'genotype_key1', [1])
     sample_data.add_sample_data('sample_name2', 'genotype_key1', [3, 4])
     self.assertEqual(
         sample_data.get_field('sample_name1', 'genotype_key1'), [1])
     self.assertEqual(
         sample_data.get_field('sample_name2', 'genotype_key1'), [3, 4])
Exemple #3
0
    def test_should_merge_genotype_call_object_in_sample_data(self):
        sample_data1 = SampleData(['GT'], ['sample_name'])
        sample_data1.add_sample_data('sample_name', 'GT', GenotypeCall('0/1'))
        sample_data2 = SampleData(['GT'], ['sample_name'])
        sample_data2.add_sample_data('sample_name', 'GT', GenotypeCall('0/1'))

        sample_data1.merge_genotype_calls(sample_data2.genotypes())

        self.assertEqual(sample_data1.get_field("sample_name", "GT"),
                         GenotypeCall("1/1"))
Exemple #4
0
    def test_should_fail_if_sample_data_objects_have_different_sample(self):
        sample_data1 = SampleData(['GT'], ['sample_name_1'])
        sample_data1.add_sample_data('sample_name_1', 'GT',
                                     GenotypeCall('0/0'))
        sample_data2 = SampleData(['GT'], ['sample_name_2'])
        sample_data2.add_sample_data('sample_name_2', 'GT',
                                     GenotypeCall('0/0'))

        self.assertRaises(Exception, sample_data1.merge_genotype_calls,
                          sample_data2.genotypes())
Exemple #5
0
class TestGenotypeDataView(unittest.TestCase):
    def setUp(self):
        self.sample_data = SampleData(['GT', 'key'],
                                      ['sample_name1', 'sample_name2'])
        self.sample_data.add_sample_data("sample_name1", "key", [1, 2])
        self.sample_data.add_sample_data("sample_name2", "GT",
                                         GenotypeCall("0/1"))

    def test_contains_method_returns_expected_value_sample1(self):
        genotype_data = self.sample_data.get_genotype_data("sample_name1")
        self.assertNotIn("cheesecake", genotype_data)
        self.assertNotIn("sample_name1", genotype_data)
        self.assertIn("GT", genotype_data)
        self.assertIn("key", genotype_data)

    def test_contains_method_returns_expected_value_sample2(self):
        genotype_data = self.sample_data.get_genotype_data("sample_name2")
        self.assertIn("GT", genotype_data)
        self.assertIn("key", genotype_data)

    def test_getitem_method_returns_expected_value(self):
        genotype_data = self.sample_data.get_genotype_data("sample_name1")
        self.assertEqual(genotype_data["GT"], GenotypeCall("./."))
        self.assertEqual(genotype_data["key"], [1, 2])
        genotype_data = self.sample_data.get_genotype_data("sample_name2")
        self.assertEqual(genotype_data["GT"], GenotypeCall("0/1"))
        self.assertEqual(genotype_data["key"], [])

    def test_keys_method_returns_expected_data(self):
        genotype_data = self.sample_data.get_genotype_data("sample_name1")
        self.assertEqual(list(genotype_data.keys()), ["GT", "key"])
        genotype_data = self.sample_data.get_genotype_data("sample_name2")
        self.assertEqual(list(genotype_data.keys()), ["GT", "key"])

    def test_values_method_returns_expected_data(self):
        genotype_data = self.sample_data.get_genotype_data("sample_name1")
        self.assertEqual(list(genotype_data.values()),
                         [GenotypeCall("./."), [1, 2]])
        genotype_data = self.sample_data.get_genotype_data("sample_name2")
        self.assertEqual(list(genotype_data.values()),
                         [GenotypeCall("0/1"), []])
Exemple #6
0
 def test_should_add_sample_data(self):
     sample_data = SampleData(['genotype_key1'], ['sample_name'])
     sample_data.add_sample_data('sample_name', 'genotype_key1', [1])
     self.assertEqual(sample_data.get_field('sample_name', 'genotype_key1'),
                      [1])
Exemple #7
0
 def test_gets_value_for_GQ_key(self):
     sample_name = 'sample_name'
     sample_data = SampleData(['GQ'], [sample_name])
     sample_data.add_sample_data(sample_name, 'GQ', [2.3])
     self.assertEqual(sample_data.get_genotype_quality(sample_name), [2.3])
Exemple #8
0
 def test_gets_exact_values_if_key_is_NV(self):
     sample_name = 'sample_name'
     sample_data = SampleData(['NV'], [sample_name])
     sample_data.add_sample_data(sample_name, 'NV', [100])
     self.assertEqual(sample_data.get_variant_support(sample_name), [100])
Exemple #9
0
 def test_gets_exact_values_if_key_is_NR(self):
     sample_name = 'sample_name'
     sample_data = SampleData(['NR'], [sample_name])
     sample_data.add_sample_data(sample_name, 'NR', [100])
     self.assertEqual(sample_data.get_read_depth(sample_name), [100])
Exemple #10
0
 def test_gets_dot_if_key_is_PL(self):
     sample_name = 'sample_name'
     sample_data = SampleData(['PL'], [sample_name])
     sample_data.add_sample_data(sample_name, 'PL', '.')
     self.assertEqual(sample_data.get_genotype_likelihoods(sample_name),
                      '.')
Exemple #11
0
 def test_gets_list_of_none_if_key_is_GL(self):
     sample_name = 'sample_name'
     sample_data = SampleData(['GL'], [sample_name])
     sample_data.add_sample_data(sample_name, 'GL', [None, None, None])
     self.assertEqual(sample_data.get_genotype_likelihoods(sample_name),
                      [None, None, None])
Exemple #12
0
 def test_gets_exact_values_if_key_is_PL(self):
     sample_name = 'sample_name'
     sample_data = SampleData(['PL'], [sample_name])
     sample_data.add_sample_data(sample_name, 'PL', [1, 2, 3])
     self.assertEqual(sample_data.get_genotype_likelihoods(sample_name),
                      [-0.1, -0.2, -0.3])
Exemple #13
0
    def test_should_write_missing_values_in_sample_data(self):
        with VCFReaderContextManager(
                os.path.join(self.data_dir, "vcf_example.vcf")) as vcf_handler:
            first_record = next(vcf_handler.read_records())

        sample_data = SampleData(['GT', 'PL', 'GQ'],
                                 ['sample1', 'sample2', 'sample3'])

        sample_data.add_sample_data("sample1", "GT", GenotypeCall("1|0"))
        sample_data.add_sample_data("sample1", "PL", [3000, 0, 3000])
        sample_data.add_sample_data("sample1", "GQ", [1000])

        sample_data.add_sample_data("sample2", "GT", GenotypeCall("1|1"))
        sample_data.add_sample_data("sample2", "PL", [2000, 0, 1000])
        sample_data.add_sample_data("sample2", "GQ", [3])

        first_record.sample_info = sample_data

        print((sample_data.to_vcf_columns()))
        vcf_string = vcf_row_from_record(first_record)
        expected_vcf_string = "20	10	.	CT	C	3000	PASS	PP=3000;DP=250;DPR=140;DPF=110;VC=100;VCR=49;VCF=51;ABPV=0.2;SBPV=0.3;MQ=70.0;BR=31.0;QD=None	GT:PL:GQ	1|0:3000,0,3000:1000	1|1:2000,0,1000:3	./.:.:."  # noqa
        self.assertEqual(expected_vcf_string, vcf_string)
Exemple #14
0
    def test_read_sample_data(self):
        schema = self.__get_example_schema("vcf_example.vcf")
        sample_schema = [key for key, _ in schema.iter_sample_data()]

        sample_data = SampleData(sample_schema, ['sample1'])

        sample_data.add_sample_data("sample1", "GT", GenotypeCall("1|0"))
        sample_data.add_sample_data("sample1", "PL", [3000, 0, 3000])
        sample_data.add_sample_data("sample1", "GQ", [1000])
        sample_data.add_sample_data("sample1", "PQ", [2000])
        sample_data.add_sample_data("sample1", "PS", [60000])
        sample_data.add_sample_data("sample1", "AD", [140, 110])
        sample_data.add_sample_data("sample1", "DP", [250])
        sample_data.add_sample_data("sample1", "VAF", [0.4])

        self.assertTrue(sample_data.has_sample("sample1"))
        self.assertEqual(sample_data.genotypes(),
                         {"sample1": GenotypeCall("1|0")})
        self.assertEqual(sample_data.get_field("sample1", 'GT'),
                         GenotypeCall("1|0"))
        self.assertEqual(sample_data.get_field("sample1", 'PL'),
                         [3000, 0, 3000])

        genotype_data = sample_data.get_genotype_data("sample1")
        self.assertEqual(genotype_data.genotype(), GenotypeCall("1|0"))
        self.assertEqual(genotype_data['GT'], GenotypeCall("1|0"))
        self.assertEqual(genotype_data['PL'], [3000, 0, 3000])