def test_kosman_pairwise_by_chunk(self): a = numpy.array([[-1, -1], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 1], [0, 1], [0, 0], [0, 0], [0, 1]]) b = numpy.array([[1, 1], [-1, -1], [0, 0], [0, 0], [1, 1], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 1]]) c = numpy.full(shape=(11, 2), fill_value=1, dtype=numpy.int16) d = numpy.full(shape=(11, 2), fill_value=1, dtype=numpy.int16) gts = numpy.stack((a, b, c, d), axis=0) gts = numpy.transpose(gts, axes=(1, 0, 2)).astype(numpy.int16) variations = VariationsArrays() variations['/calls/GT'] = gts expected = [0.33333333, 0.75, 0.75, 0.45, 0.45, 0.] distance = calc_pairwise_distance(variations, chunk_size=None) assert numpy.allclose(distance, expected) distance = calc_pairwise_distance(variations, chunk_size=2) assert numpy.allclose(distance, expected) # With all missing a = numpy.full(shape=(10, 2), fill_value=-1, dtype=numpy.int16) b = numpy.full(shape=(10, 2), fill_value=-1, dtype=numpy.int16) gts = numpy.stack((a, b), axis=0) gts = numpy.transpose(gts, axes=(1, 0, 2)).astype(numpy.int16) variations = VariationsArrays() variations['/calls/GT'] = gts distance = calc_pairwise_distance(variations) assert numpy.isnan(distance[0]) # With missing in some chunks only variations['/calls/GT'][:5, 0, :] = 1 variations['/calls/GT'][:5, 1, :] = 0 assert calc_pairwise_distance(variations)[0] == 1 assert calc_pairwise_distance(variations, chunk_size=3)[0] == 1
def test_matching_pairwise_by_chunk(self): a = numpy.array([[-1, -1], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 1], [0, 1], [0, 0], [0, 0], [0, 1]]) b = numpy.array([[1, 1], [-1, -1], [0, 0], [0, 0], [1, 1], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 1]]) c = numpy.full(shape=(11, 2), fill_value=1, dtype=numpy.int16) d = numpy.full(shape=(11, 2), fill_value=1, dtype=numpy.int16) gts = numpy.stack((a, b, c, d), axis=0) gts = numpy.transpose(gts, axes=(1, 0, 2)).astype(numpy.int16) variations = VariationsArrays() variations['/calls/GT'] = gts expected = [0.444444, 0, 0, 0.3, 0.3, 1] distance = calc_pairwise_distance(variations, chunk_size=None, method='matching') assert numpy.allclose(distance, expected) distance = calc_pairwise_distance(variations, chunk_size=2, method='matching') assert numpy.allclose(distance, expected) # With all missing a = numpy.full(shape=(10, 2), fill_value=-1, dtype=numpy.int16) b = numpy.full(shape=(10, 2), fill_value=-1, dtype=numpy.int16) gts = numpy.stack((a, b), axis=0) gts = numpy.transpose(gts, axes=(1, 0, 2)).astype(numpy.int16) variations = VariationsArrays() variations['/calls/GT'] = gts distance = calc_pairwise_distance(variations, method='matching') assert numpy.isnan(distance[0])
def test_matching_pairwise_by_chunk(self): a = numpy.array([[-1, -1], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 1], [0, 1], [0, 0], [0, 0], [0, 1]]) b = numpy.array([[1, 1], [-1, -1], [0, 0], [0, 0], [1, 1], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 1]]) c = numpy.full(shape=(11, 2), fill_value=1, dtype=numpy.int16) d = numpy.full(shape=(11, 2), fill_value=1, dtype=numpy.int16) gts = numpy.stack((a, b, c, d), axis=0) gts = numpy.transpose(gts, axes=(1, 0, 2)).astype(numpy.int16) variations = VariationsArrays() variations['/calls/GT'] = gts expected = [0.444444, 0, 0, 0.3, 0.3, 1] distance = calc_pairwise_distance(variations, chunk_size=None, method='matching') assert numpy.allclose(distance, expected) distance = calc_pairwise_distance(variations, chunk_size=2, method='matching') assert numpy.allclose(distance, expected) # With all missing a = numpy.full(shape=(10, 2), fill_value=-1, dtype=numpy.int16) b = numpy.full(shape=(10, 2), fill_value=-1, dtype=numpy.int16) gts = numpy.stack((a, b), axis=0) gts = numpy.transpose(gts, axes=(1, 0, 2)).astype(numpy.int16) variations = VariationsArrays() variations['/calls/GT'] = gts distance = calc_pairwise_distance(variations, method='matching') assert numpy.isnan(distance[0])
def test_kosman_pairwise_by_chunk(self): a = numpy.array([[-1, -1], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 1], [0, 1], [0, 0], [0, 0], [0, 1]]) b = numpy.array([[1, 1], [-1, -1], [0, 0], [0, 0], [1, 1], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 1]]) c = numpy.full(shape=(11, 2), fill_value=1, dtype=numpy.int16) d = numpy.full(shape=(11, 2), fill_value=1, dtype=numpy.int16) gts = numpy.stack((a, b, c, d), axis=0) gts = numpy.transpose(gts, axes=(1, 0, 2)).astype(numpy.int16) variations = VariationsArrays() variations['/calls/GT'] = gts expected = [0.33333333, 0.75, 0.75, 0.45, 0.45, 0.] distance = calc_pairwise_distance(variations, chunk_size=None, min_num_snps=1) assert numpy.allclose(distance, expected) distance = calc_pairwise_distance(variations, chunk_size=2) assert numpy.allclose(distance, expected) distance = calc_pairwise_distance(variations, chunk_size=None, min_num_snps=11) assert numpy.sum(numpy.isnan(distance)) == 5 # With all missing a = numpy.full(shape=(10, 2), fill_value=-1, dtype=numpy.int16) b = numpy.full(shape=(10, 2), fill_value=-1, dtype=numpy.int16) gts = numpy.stack((a, b), axis=0) gts = numpy.transpose(gts, axes=(1, 0, 2)).astype(numpy.int16) variations = VariationsArrays() variations['/calls/GT'] = gts distance = calc_pairwise_distance(variations) assert numpy.isnan(distance[0]) # With missing in some chunks only variations['/calls/GT'][:5, 0, :] = 1 variations['/calls/GT'][:5, 1, :] = 0 assert calc_pairwise_distance(variations)[0] == 1 assert calc_pairwise_distance(variations, chunk_size=3)[0] == 1