def test_seeding_is_consistent(self): hl.set_global_seed(0) a = [Env.next_seed() for _ in range(10)] hl.set_global_seed(0) b = [Env.next_seed() for _ in range(10)] self.assertEqual(len(set(a)), 10) self.assertEqual(a, b)
def test_king_homo_estimator(): hl.set_global_seed(1) mt = hl.balding_nichols_model(2, 5, 5) mt = mt.select_entries(genotype_score=hl.float(mt.GT.n_alt_alleles())) da = hl.experimental.dnd.array(mt, 'genotype_score', block_size=3) def sqr(x): return x * x score_difference = da.T.inner_product( da, lambda l, r: sqr(l - r), lambda l, r: l + r, hl.float(0), hl.agg.sum).checkpoint(new_temp_file()) assert np.array_equal( score_difference.collect(), np.array([[0., 6., 4., 2., 4.], [6., 0., 6., 4., 6.], [4., 6., 0., 6., 0.], [2., 4., 6., 0., 6.], [4., 6., 0., 6., 0.]]))
import hail as hl hl.set_global_seed(0) mt = hl.balding_nichols_model(n_populations=3, n_variants=(1 << 10), n_samples=4) mt = mt.key_cols_by(s='s' + hl.str(mt.sample_idx)) mt = mt.annotate_entries(GT=hl.or_missing(hl.rand_bool(0.99), mt.GT)) hl.export_plink(mt, 'balding-nichols-1024-variants-4-samples-3-populations', fam_id='f' + mt.s)
h2 = 0.6 pi = 0.01 K = 0.05 n_pops = 3 fst = [0.1] * n_pops n_vars = int(100e3) n_cas_list = [5000] * 3 n_list = [2 * x for x in n_cas_list] n_sim = int(320e3) # over simulate to be able to have ascertainment else: raise ValueError(f'sim_name="{sim_name}" does not match any models') hl.set_global_seed(seed) gt_sim_suffix = f'bn.npops_{n_pops}.nvars_{n_vars}.nsim_{n_sim}' if sim_name[: 3] == 'bn_' else '' # suffix for genotype simulation (empty string if using ukb data) param_suffix = f'{gt_sim_suffix}.h2_{h2}.pi_{pi}.K_{K}.seed_{seed}' betas_path = f'{smiles_wd}/betas.{param_suffix}.tsv.gz' phens_path = f'{smiles_wd}/phens.{param_suffix}.tsv.gz' if sim_name[:3] == 'bn_': mt = hl.balding_nichols_model(n_populations=n_pops, n_samples=n_sim, n_variants=n_vars, fst=fst) mt = mt.filter_rows( (hl.abs(hl.agg.mean(mt.GT.n_alt_alleles()) / 2 - 0.5) <