def test_merge_simple_var(self): vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 34}]) vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 35}]) vars1.samples = ['a', 'b'] vars2.samples = ['c', 'd'] merger = MockMerger(gt_shape=(4, 2)) variation = VarMerger._merge_vars(merger, vars1[0], vars2[0]) exp = {'gts': [[0, 0], [1, 1], [0, 0], [1, 1]], 'pos': 1, 'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 34} self.var_is_equal(exp, variation) vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}]) vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'A', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}]) vars1.samples = ['a', 'b'] vars2.samples = ['c', 'd'] variation = VarMerger._merge_vars(merger, vars1[0], None) exp = {'gts': [[0, 0], [1, 1], [-1, -1], [-1, -1]], 'pos': 1, 'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 21} self.var_is_equal(exp, variation) variation = VarMerger._merge_vars(merger, None, vars2[0]) exp = {'gts': [[-1, -1], [-1, -1], [0, 0], [1, 1]], 'pos': 2, 'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'qual': 21} self.var_is_equal(exp, variation)
def test_merge_complex_var(self): # Deletion vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C', 'alt': [b'CAAG', b'CAAA'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}]) vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C', 'alt': [b'A'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': None}]) vars1.samples = ['a', 'b'] vars2.samples = ['c', 'd'] merger = MockMerger(gt_shape=(4, 2)) variation = VarMerger._merge_vars(merger, vars1[0], vars2[0]) exp = {'gts': [[0, 0], [1, 1], [0, 0], [3, 3]], 'pos': 1, 'ref': b'C', 'chrom': '1', 'alt': [b'CAAG', b'CAAA', b'A'], 'qual': None} self.var_is_equal(exp, variation) vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'ATT', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}]) vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'T', 'alt': [b'A'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': None}]) vars1.samples = ['a', 'b'] vars2.samples = ['c', 'd'] merger = MockMerger(gt_shape=(4, 2)) variation = VarMerger._merge_vars(merger, vars1[0], vars2[0]) exp = {'gts': [[0, 0], [1, 1], [0, 0], [2, 2]], 'pos': 1, 'ref': b'ATT', 'chrom': '1', 'alt': [b'T', b'AAT'], 'qual': None} self.var_is_equal(exp, variation) vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'C', 'alt': [b'CGGT'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': 21}]) vars2 = MockList([{'chrom': '1', 'pos': 2, 'ref': b'C', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'qual': None}]) vars1.samples = ['a', 'b'] vars2.samples = ['c', 'd'] merger = MockMerger(gt_shape=(4, 2)) try: variation = merger._merge_vars(vars1[0], vars2[0]) self.fail('MalformedVariationError expected') except MalformedVariationError: pass
def test_merge_with_depth(self): vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'dp': numpy.array([1, 1])}]) vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'dp': numpy.array([20, 20])}]) vars1.samples = ['a', 'b'] vars2.samples = ['c', 'd'] merger = MockMerger(gt_shape=(4, 2)) variation = VarMerger._merge_vars(merger, vars1[0], vars2[0]) exp = {'gts': [[0, 0], [1, 1], [0, 0], [1, 1]], 'pos': 1, 'ref': b'A', 'chrom': '1', 'alt': [b'T'], 'dp': [1, 1, 20, 20]} self.var_is_equal(exp, variation) # merge the same var with depth h5_1 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r") h5_2 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r") merger = VarMerger(h5_1, h5_2, max_field_lens={'alt': 3}, ignore_complex_overlaps=True, check_ref_matches=False, ignore_non_matching=True) new_vars = VariationsArrays(ignore_overflows=True, ignore_undefined_fields=True) first_snv_merged_depth = numpy.array([1, 8, 5, 1, 8, 5], dtype=numpy.int16) depth = list(merger.variations)[0][8][1] assert depth[0] == b'DP' assert numpy.all(depth[1] == first_snv_merged_depth) new_vars.put_vars(merger) assert '/calls/DP' in new_vars.keys() assert numpy.all(new_vars['/calls/DP'][0] == first_snv_merged_depth)
def test_ignore_non_matching(self): h5_1 = VariationsH5(join(TEST_DATA_DIR, 'csv', 'format.h5'), "r") h5_2 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r") merger = VarMerger(h5_1, h5_2, max_field_lens={'alt': 3}, ignore_complex_overlaps=True, check_ref_matches=False, ignore_non_matching=True) new_vars = VariationsArrays(ignore_undefined_fields=True) new_vars.put_vars(merger) assert new_vars.num_variations == 1
def test_merge_with_depth(self): vars1 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'dp': numpy.array([1, 1])}]) vars2 = MockList([{'chrom': '1', 'pos': 1, 'ref': b'A', 'alt': [b'T'], 'gts': numpy.array([[0, 0], [1, 1]]), 'dp': numpy.array([20, 20])}]) vars1.samples = ['a', 'b'] vars2.samples = ['c', 'd'] merger = MockMerger(gt_shape=(4, 2)) variation = VarMerger._merge_vars(merger, vars1[0], vars2[0]) exp = {'gts': [[0, 0], [1, 1], [0, 0], [1, 1]], 'pos': 1, 'ref': b'A', 'chrom': '1', 'alt': [b'T']} self.var_is_equal(exp, variation)
def test_merge_variations(self): h5_1 = VariationsH5(join(TEST_DATA_DIR, 'csv', 'format.h5'), "r") h5_2 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r") merger = VarMerger(h5_1, h5_2, max_field_lens={'alt': 3}, ignore_complex_overlaps=True, check_ref_matches=False) assert merger.ploidy == 2 assert merger.samples == [b'TS-1', b'TS-11', b'TS-21', b'NA00001', b'NA00002', b'NA00003'] expected_h5 = VariationsH5(join(TEST_DATA_DIR, 'expected_merged.h5'), 'r') new_vars = VariationsArrays(ignore_undefined_fields=True) new_vars.put_vars(merger) first_h5 = h5_1 second_h5 = h5_2 field_paths = [] for field_path in field_paths: print('path', field_path) print('first:') if field_path in first_h5: print(h5_1[field_path][:].shape) print('second:') if field_path in second_h5: print(second_h5[field_path][:].shape) print('expected:') print(expected_h5[field_path][:].shape) print('merged:') print(new_vars[field_path].shape) for field in new_vars.keys(): if 'float' in str(new_vars[field][:].dtype): assert numpy.allclose(remove_nans(expected_h5[field][:]), remove_nans(new_vars[field][:])) else: result = new_vars[field][:] try: if not expected_h5[field][:].shape == result.shape: raise AssertionError('comparison failed for field: ' + field) assert numpy.all(expected_h5[field][:] == result) except (AssertionError, ValueError, TypeError): print(field) print(expected_h5[field][:]) print(result) raise # Change the order h5_1 = VariationsH5(join(TEST_DATA_DIR, 'csv', 'format.h5'), "r") h5_2 = VariationsH5(join(TEST_DATA_DIR, 'format_def.h5'), "r") merger = VarMerger(h5_2, h5_1, max_field_lens={'alt': 3}, ignore_complex_overlaps=True, check_ref_matches=False) assert merger.ploidy == 2 assert merger.samples == [b'NA00001', b'NA00002', b'NA00003', b'TS-1', b'TS-11', b'TS-21'] expected_h5 = VariationsH5(join(TEST_DATA_DIR, 'expected_merged2.h5'), 'r') new_vars = VariationsArrays(ignore_undefined_fields=True) new_vars.put_vars(merger) for field in new_vars.keys(): if 'float' in str(new_vars[field][:].dtype): assert numpy.all(remove_nans(expected_h5[field][:]) == remove_nans(new_vars[field][:])) else: result = new_vars[field][:] assert numpy.all(expected_h5[field][:] == result)