def test_calc_obs_het(self): gts = numpy.array([]) dps = numpy.array([]) varis = {'/calls/GT': gts, '/calls/DP': dps} het = calc_obs_het(varis, min_num_genotypes=0) assert het.shape[0] == 0 hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') snps = VariationsArrays() snps.put_chunks(hdf5.iterate_chunks(kept_fields=['/calls/GT'])) het_h5 = calc_obs_het(hdf5, min_num_genotypes=0) het_array = calc_obs_het(snps, min_num_genotypes=0) assert numpy.all(het_array == het_h5) gts = numpy.array([[[0, 0], [0, 1], [0, -1], [-1, -1]], [[0, 0], [0, 0], [0, -1], [-1, -1]]]) dps = numpy.array([[5, 12, 10, 10], [10, 10, 10, 10]]) varis = {'/calls/GT': gts, '/calls/DP': dps} het = calc_obs_het(varis, min_num_genotypes=0) assert numpy.allclose(het, [0.5, 0]) het = calc_obs_het(varis, min_num_genotypes=10) assert numpy.allclose(het, [numpy.NaN, numpy.NaN], equal_nan=True) het = calc_obs_het(varis, min_num_genotypes=0, min_call_dp=10) assert numpy.allclose(het, [1, 0]) het = calc_obs_het(varis, min_num_genotypes=0, max_call_dp=11) assert numpy.allclose(het, [0, 0]) het = calc_obs_het(varis, min_num_genotypes=0, min_call_dp=5) assert numpy.allclose(het, [0.5, 0])
def _calc_stat(self, variations): return calc_obs_het(variations, min_num_genotypes=self.min_num_genotypes, min_call_dp=self.min_call_dp)