Esempio n. 1
0
    def test_merge_simple_var(self):

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 34}])
        vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 35}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))

        variation = VarMerger._merge_vars(merger, vars1[0], vars2[0])
        exp = {'gts': [[0, 0], [1, 1], [0, 0], [1, 1]], 'pos': 1,
               'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 34}
        self.var_is_equal(exp, variation)

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}])
        vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']

        variation = VarMerger._merge_vars(merger, vars1[0], None)
        exp = {'gts': [[0, 0], [1, 1], [-1, -1], [-1, -1]], 'pos': 1,
               'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 21}
        self.var_is_equal(exp, variation)

        variation = VarMerger._merge_vars(merger, None, vars2[0])
        exp = {'gts': [[-1, -1], [-1, -1], [0, 0], [1, 1]], 'pos': 2,
               'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 21}
        self.var_is_equal(exp, variation)
Esempio n. 2
0
    def test_merge_simple_var(self):

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 34}])
        vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 35}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))

        variation = VarMerger._merge_vars(merger, vars1[0], vars2[0])
        exp = {'gts': [[0, 0], [1, 1], [0, 0], [1, 1]], 'pos': 1,
               'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 34}
        self.var_is_equal(exp, variation)

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}])
        vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']

        variation = VarMerger._merge_vars(merger, vars1[0], None)
        exp = {'gts': [[0, 0], [1, 1], [-1, -1], [-1, -1]], 'pos': 1,
               'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 21}
        self.var_is_equal(exp, variation)

        variation = VarMerger._merge_vars(merger, None, vars2[0])
        exp = {'gts': [[-1, -1], [-1, -1], [0, 0], [1, 1]], 'pos': 2,
               'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 21}
        self.var_is_equal(exp, variation)
Esempio n. 3
0
    def test_merge_complex_var(self):
        # Deletion
        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C',
                           'alt': [b'CAAG', b'CAAA'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': 21}])
        vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C', 'alt': [b'A'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': None}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))
        variation = VarMerger._merge_vars(merger, vars1[0], vars2[0])
        exp = {'gts': [[0, 0], [1, 1], [0, 0], [3, 3]], 'pos': 1,
               'ref': b'C', 'chrom': '1', 'alt': [b'CAAG', b'CAAA', b'A'],
               'qual': None}
        self.var_is_equal(exp, variation)

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'ATT',
                           'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': 21}])
        vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'T', 'alt': [b'A'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': None}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))
        variation = VarMerger._merge_vars(merger, vars1[0], vars2[0])
        exp = {'gts': [[0, 0], [1, 1], [0, 0], [2, 2]], 'pos': 1,
               'ref': b'ATT', 'chrom': '1', 'alt': [b'T', b'AAT'],
               'qual': None}
        self.var_is_equal(exp, variation)

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C',
                           'alt': [b'CGGT'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': 21}])
        vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'C',
                           'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': None}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))
        try:
            variation = merger._merge_vars(vars1[0], vars2[0])
            self.fail('MalformedVariationError expected')
        except MalformedVariationError:
            pass
Esempio n. 4
0
    def test_merge_complex_var(self):
        # Deletion
        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C',
                           'alt': [b'CAAG', b'CAAA'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': 21}])
        vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C', 'alt': [b'A'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': None}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))
        variation = VarMerger._merge_vars(merger, vars1[0], vars2[0])
        exp = {'gts': [[0, 0], [1, 1], [0, 0], [3, 3]], 'pos': 1,
               'ref': b'C', 'chrom': '1', 'alt': [b'CAAG', b'CAAA', b'A'],
               'qual': None}
        self.var_is_equal(exp, variation)

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'ATT',
                           'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': 21}])
        vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'T', 'alt': [b'A'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': None}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))
        variation = VarMerger._merge_vars(merger, vars1[0], vars2[0])
        exp = {'gts': [[0, 0], [1, 1], [0, 0], [2, 2]], 'pos': 1,
               'ref': b'ATT', 'chrom': '1', 'alt': [b'T', b'AAT'],
               'qual': None}
        self.var_is_equal(exp, variation)

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C',
                           'alt': [b'CGGT'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': 21}])
        vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'C',
                           'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'qual': None}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))
        try:
            variation = merger._merge_vars(vars1[0], vars2[0])
            self.fail('MalformedVariationError expected')
        except MalformedVariationError:
            pass
Esempio n. 5
0
    def test_merge_with_depth(self):

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'dp': numpy.array([1, 1])}])
        vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'dp': numpy.array([20, 20])}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))

        variation = VarMerger._merge_vars(merger, vars1[0], vars2[0])
        exp = {'gts': [[0, 0], [1, 1], [0, 0], [1, 1]], 'pos': 1,
               'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'dp': [1, 1, 20, 20]}
        self.var_is_equal(exp, variation)

        # merge the same var with depth
        h5_1 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r")
        h5_2 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r")
        merger = VarMerger(h5_1, h5_2, max_field_lens={'alt': 3},
                           ignore_complex_overlaps=True,
                           check_ref_matches=False, ignore_non_matching=True)
        new_vars = VariationsArrays(ignore_overflows=True,
                                    ignore_undefined_fields=True)

        first_snv_merged_depth = numpy.array([1, 8, 5, 1, 8, 5],
                                             dtype=numpy.int16)
        depth = list(merger.variations)[0][8][1]
        assert depth[0] == b'DP'
        assert numpy.all(depth[1] == first_snv_merged_depth)
        new_vars.put_vars(merger)
        assert '/calls/DP' in new_vars.keys()
        assert numpy.all(new_vars['/calls/DP'][0] == first_snv_merged_depth)
Esempio n. 6
0
    def test_ignore_non_matching(self):

        h5_1 = VariationsH5(join(TEST_DATA_DIR, 'csv', 'format.h5'), "r")
        h5_2 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r")
        merger = VarMerger(h5_1, h5_2, max_field_lens={'alt': 3},
                           ignore_complex_overlaps=True,
                           check_ref_matches=False, ignore_non_matching=True)
        new_vars = VariationsArrays(ignore_undefined_fields=True)
        new_vars.put_vars(merger)
        assert new_vars.num_variations == 1
Esempio n. 7
0
    def test_merge_with_depth(self):

        vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'dp': numpy.array([1, 1])}])
        vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'],
                           'gts': numpy.array([[0, 0], [1, 1]]),
                           'dp': numpy.array([20, 20])}])
        vars1.samples = ['a', 'b']
        vars2.samples = ['c', 'd']
        merger = MockMerger(gt_shape=(4, 2))

        variation = VarMerger._merge_vars(merger, vars1[0], vars2[0])
        exp = {'gts': [[0, 0], [1, 1], [0, 0], [1, 1]], 'pos': 1,
               'ref': b'A', 'chrom': '1', 'alt': [b'T']}
        self.var_is_equal(exp, variation)
Esempio n. 8
0
    def test_merge_variations(self):
        h5_1 = VariationsH5(join(TEST_DATA_DIR, 'csv', 'format.h5'), "r")
        h5_2 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r")
        merger = VarMerger(h5_1, h5_2, max_field_lens={'alt': 3},
                           ignore_complex_overlaps=True,
                           check_ref_matches=False)
        assert merger.ploidy == 2
        assert merger.samples == [b'TS-1', b'TS-11', b'TS-21', b'NA00001',
                                  b'NA00002', b'NA00003']
        expected_h5 = VariationsH5(join(TEST_DATA_DIR, 'expected_merged.h5'),
                                   'r')
        new_vars = VariationsArrays(ignore_undefined_fields=True)
        new_vars.put_vars(merger)

        first_h5 = h5_1
        second_h5 = h5_2

        field_paths = []
        for field_path in field_paths:
            print('path', field_path)
            print('first:')
            if field_path in first_h5:
                print(h5_1[field_path][:].shape)
            print('second:')
            if field_path in second_h5:
                print(second_h5[field_path][:].shape)
            print('expected:')
            print(expected_h5[field_path][:].shape)
            print('merged:')
            print(new_vars[field_path].shape)


        for field in new_vars.keys():
            if 'float' in str(new_vars[field][:].dtype):
                assert numpy.allclose(remove_nans(expected_h5[field][:]),
                                      remove_nans(new_vars[field][:]))
            else:
                result = new_vars[field][:]

                try:
                    if not expected_h5[field][:].shape == result.shape:
                        raise AssertionError('comparison failed for field: ' + field)
                    assert numpy.all(expected_h5[field][:] == result)
                except (AssertionError, ValueError, TypeError):
                    print(field)
                    print(expected_h5[field][:])
                    print(result)
                    raise

        # Change the order
        h5_1 = VariationsH5(join(TEST_DATA_DIR, 'csv', 'format.h5'), "r")
        h5_2 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r")
        merger = VarMerger(h5_2, h5_1, max_field_lens={'alt': 3},
                           ignore_complex_overlaps=True,
                           check_ref_matches=False)
        assert merger.ploidy == 2
        assert merger.samples == [b'NA00001', b'NA00002', b'NA00003',
                                  b'TS-1', b'TS-11', b'TS-21']
        expected_h5 = VariationsH5(join(TEST_DATA_DIR, 'expected_merged2.h5'),
                                   'r')
        new_vars = VariationsArrays(ignore_undefined_fields=True)
        new_vars.put_vars(merger)

        for field in new_vars.keys():
            if 'float' in str(new_vars[field][:].dtype):
                assert numpy.all(remove_nans(expected_h5[field][:]) ==
                                 remove_nans(new_vars[field][:]))
            else:
                result = new_vars[field][:]
                assert numpy.all(expected_h5[field][:] == result)