def test_fit_lmm_block(self): p = pd.read_csv(P_BINARY, index_col=0, sep='\t')['binary'] cov = pd.DataFrame([]) x, y, z = initialise_lmm(p, cov, S, lmm_cache_in=None, lmm_cache_out=None) k = np.loadtxt(K)[:p.shape[0]] variant_mat = k.reshape(-1, 1) result = fit_lmm_block(y, z, variant_mat) self.assertAlmostEqual(result['beta'][0], 0.15136876) self.assertAlmostEqual(result['bse'][0], 0.14208536) self.assertAlmostEqual(result['frac_h2'][0], 0.15198184) self.assertAlmostEqual(result['p_values'][0], 0.29205322) # impossibly high h2 with self.assertRaises(KeyError): fit_lmm_block(y, 1, variant_mat) # shape mismatch with self.assertRaises(AssertionError): fit_lmm_block(y, z, variant_mat[:10])
def test_initialise_lmm(self): p = pd.read_csv(P_BINARY, index_col=0, sep='\t')['binary'] cov = pd.DataFrame([]) x, y, z = initialise_lmm(p, cov, S, lmm_cache_in=None, lmm_cache_out=None) self.assertEqual(x.shape[0], 50) self.assertAlmostEqual(y.findH2()['nLL'][0], 35.7033778) self.assertAlmostEqual(z, 0.0) # covariates cov = pd.read_csv(COV, index_col=0, header=None, sep='\t') x, y, z = initialise_lmm(p, cov, S, lmm_cache_in=None, lmm_cache_out=None) self.assertEqual(x.shape[0], 50) self.assertAlmostEqual(y.findH2()['nLL'][0], 35.7033778) self.assertAlmostEqual(z, 0.0) # sample names not matching b = pd.Series(np.random.random(100), index=['test_%d' % x for x in range(100)]) with warnings.catch_warnings(): warnings.simplefilter('ignore') x, y, z = initialise_lmm(b, cov, S, lmm_cache_in=None, lmm_cache_out=None) self.assertEqual(x.shape[0], 0) self.assertTrue(not np.isfinite(y.findH2()['nLL'][0])) self.assertAlmostEqual(z, 0.0) # save cache x, y, z = initialise_lmm(p, cov, S, lmm_cache_in=None, lmm_cache_out=C) # load cache x, y, z = initialise_lmm(p, cov, S, lmm_cache_in=C, lmm_cache_out=None) self.assertEqual(x.shape[0], 50) self.assertAlmostEqual(y.findH2()['nLL'][0], 35.7033778) self.assertAlmostEqual(z, 0.0) # different sizes b = pd.Series(np.random.random(10), index=['test_%d' % x for x in range(10)]) with self.assertRaises(SystemExit) as cm: initialise_lmm(b, cov, S, lmm_cache_in=C, lmm_cache_out=None) self.assertEqual(cm.exception.code, 1) # matching lineage samples cov = pd.DataFrame([]) s = pd.read_csv(S, index_col=0, sep='\t') x, y, z = initialise_lmm(p, cov, S, lmm_cache_in=None, lmm_cache_out=None, lineage_samples=s.index) # non-matching lineage samples with self.assertRaises(SystemExit) as cm: x, y, z = initialise_lmm(p, cov, S, lmm_cache_in=None, lmm_cache_out=None, lineage_samples=s.index[:-1])
def test_fit_lmm(self): p = pd.read_csv(P_BINARY, index_col=0, sep='\t')['binary'] cov = pd.DataFrame([]) x, y, z = initialise_lmm(p, cov, S, lmm_cache_in=None, lmm_cache_out=None) var = LMM('variant', 'pattern', 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), True, True) k = np.loadtxt(K)[:p.shape[0]] variants = [(var, p.values, k),] variant_mat = k.reshape(-1, 1) results = fit_lmm(y, z, variants, variant_mat, False, [], cov, False, 1, 1) test_results = [LMM('variant', 'pattern', 0.2, 0.28252075514059294, 0.2920532220978148, 0.1513687600644123, 0.1420853593711293, 0.1519818397711344, None, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), False, False),] for var, test_var in zip(results, test_results): self.assertEqual(eq_lmm(var, test_var), set()) # af filtering var = LMM('variant', None, 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), True, True) variants = [(var, p.values, k),] variant_mat = k.reshape(-1, 1) results = fit_lmm(y, z, variants, variant_mat, False, [], cov, False, 1, 1) test_results = [LMM('variant', None, 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(['af-filter']), True, False),] for var, test_var in zip(results, test_results): self.assertEqual(eq_lmm(var, test_var), set()) # bad-chisq var = LMM('variant', 'pattern', 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), True, True) bad_k = np.array([1]*5 + [0]*(p.shape[0]-5)) variants = [(var, p.values, bad_k),] variant_mat = bad_k.reshape(-1, 1) results = fit_lmm(y, z, variants, variant_mat, False, [], cov, False, 1, 1) test_results = [LMM('variant', 'pattern', 0.2, 0.2544505826463333, 0.263519965703956, 0.2666666666666663, 0.2357022603955158, 0.16116459280507586, None, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(['bad-chisq']), False, False),] for var, test_var in zip(results, test_results): self.assertEqual(eq_lmm(var, test_var), set()) # pre-filtering var = LMM('variant', 'pattern', 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), True, True) k = np.loadtxt(K)[:p.shape[0]] variants = [(var, p.values, k),] variant_mat = k.reshape(-1, 1) results = fit_lmm(y, z, variants, variant_mat, False, [], cov, False, 0.05, 1) test_results = [LMM('variant', 'pattern', 0.2, 0.28252075514059294, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(['pre-filtering-failed']), True, False),] for var, test_var in zip(results, test_results): self.assertEqual(eq_lmm(var, test_var), set()) # lrt-filtering var = LMM('variant', 'pattern', 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), True, True) k = np.loadtxt(K)[:p.shape[0]] variants = [(var, p.values, k),] variant_mat = k.reshape(-1, 1) results = fit_lmm(y, z, variants, variant_mat, False, [], cov, False, 1, 0.05) test_results = [LMM('variant', 'pattern', 0.2, 0.28252075514059294, 0.2920532220978148, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(['lrt-filtering-failed']), False, True),] for var, test_var in zip(results, test_results): self.assertEqual(eq_lmm(var, test_var), set()) # lineage fit var = LMM('variant', 'pattern', 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), True, True) k = np.loadtxt(K)[:p.shape[0]] variants = [(var, p.values, k),] variant_mat = k.reshape(-1, 1) m = np.loadtxt(M)[:p.shape[0]] results = fit_lmm(y, z, variants, variant_mat, True, m, cov, False, 1, 1) test_results = [LMM('variant', 'pattern', 0.2, 0.28252075514059294, 0.2920532220978148, 0.1513687600644123, 0.1420853593711293, 0.1519818397711344, 0, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), False, False),] for var, test_var in zip(results, test_results): self.assertEqual(eq_lmm(var, test_var), set()) # lineage fit + covariates var = LMM('variant', 'pattern', 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), True, True) k = np.loadtxt(K)[:p.shape[0]] variants = [(var, p.values, k),] variant_mat = k.reshape(-1, 1) m = np.loadtxt(M)[:p.shape[0]] cov = pd.read_csv(COV, index_col=0, header=None, sep='\t').values results = fit_lmm(y, z, variants, variant_mat, True, m, cov, False, 1, 1) test_results = [LMM('variant', 'pattern', 0.2, 0.28252075514059294, 0.2920532220978148, 0.1513687600644123, 0.1420853593711293, 0.1519818397711344, 0, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), False, False),] for var, test_var in zip(results, test_results): self.assertEqual(eq_lmm(var, test_var), set()) # continuous phenotype var = LMM('variant', 'pattern', 0.2, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), True, True) k = np.loadtxt(K)[:p.shape[0]] variants = [(var, p.values, k),] variant_mat = k.reshape(-1, 1) results = fit_lmm(y, z, variants, variant_mat, False, [], cov, True, 1, 1) test_results = [LMM('variant', 'pattern', 0.2, 0.0, 0.2920532220978148, 0.1513687600644123, 0.1420853593711293, 0.1519818397711344, None, ['k%d' % x for x in range(p[p == 1].shape[0])], ['nk%d' % x for x in range(p[p == 0].shape[0])], set(), False, False),] for var, test_var in zip(results, test_results): self.assertEqual(eq_lmm(var, test_var), set())