예제 #1
0
 def test_fit_lmm_block(self):
     p = pd.read_csv(P_BINARY,
                     index_col=0,
                     sep='\t')['binary']
     cov = pd.DataFrame([])
     x, y, z = initialise_lmm(p, cov, S,
                              lmm_cache_in=None,
                              lmm_cache_out=None)
     k = np.loadtxt(K)[:p.shape[0]]
     variant_mat = k.reshape(-1, 1)
     result = fit_lmm_block(y, z, variant_mat)
     self.assertAlmostEqual(result['beta'][0],
                            0.15136876)
     self.assertAlmostEqual(result['bse'][0],
                            0.14208536)
     self.assertAlmostEqual(result['frac_h2'][0],
                            0.15198184)
     self.assertAlmostEqual(result['p_values'][0],
                            0.29205322)
     # impossibly high h2
     with self.assertRaises(KeyError):
         fit_lmm_block(y, 1, variant_mat)
     # shape mismatch
     with self.assertRaises(AssertionError):
         fit_lmm_block(y, z, variant_mat[:10])
예제 #2
0
 def test_initialise_lmm(self):
     p = pd.read_csv(P_BINARY,
                     index_col=0,
                     sep='\t')['binary']
     cov = pd.DataFrame([])
     x, y, z = initialise_lmm(p, cov, S,
                              lmm_cache_in=None,
                              lmm_cache_out=None)
     self.assertEqual(x.shape[0], 50)
     self.assertAlmostEqual(y.findH2()['nLL'][0],
                            35.7033778)
     self.assertAlmostEqual(z, 0.0)
     # covariates
     cov = pd.read_csv(COV, index_col=0,
                       header=None,
                       sep='\t')
     x, y, z = initialise_lmm(p, cov, S,
                              lmm_cache_in=None,
                              lmm_cache_out=None)
     self.assertEqual(x.shape[0], 50)
     self.assertAlmostEqual(y.findH2()['nLL'][0],
                            35.7033778)
     self.assertAlmostEqual(z, 0.0)
     # sample names not matching
     b = pd.Series(np.random.random(100),
                   index=['test_%d' % x for x in range(100)])
     with warnings.catch_warnings():
         warnings.simplefilter('ignore')
         x, y, z = initialise_lmm(b, cov, S,
                                  lmm_cache_in=None,
                                  lmm_cache_out=None)
         self.assertEqual(x.shape[0], 0)
         self.assertTrue(not np.isfinite(y.findH2()['nLL'][0]))
         self.assertAlmostEqual(z, 0.0)
     # save cache
     x, y, z = initialise_lmm(p, cov, S,
                              lmm_cache_in=None,
                              lmm_cache_out=C)
     # load cache
     x, y, z = initialise_lmm(p, cov, S,
                              lmm_cache_in=C,
                              lmm_cache_out=None)
     self.assertEqual(x.shape[0], 50)
     self.assertAlmostEqual(y.findH2()['nLL'][0],
                            35.7033778)
     self.assertAlmostEqual(z, 0.0)
     # different sizes
     b = pd.Series(np.random.random(10),
                   index=['test_%d' % x for x in range(10)])
     with self.assertRaises(SystemExit) as cm:
         initialise_lmm(b, cov, S,
                        lmm_cache_in=C,
                        lmm_cache_out=None)
         self.assertEqual(cm.exception.code, 1)
     # matching lineage samples
     cov = pd.DataFrame([])
     s = pd.read_csv(S, index_col=0,
                     sep='\t')
     x, y, z = initialise_lmm(p, cov, S,
                              lmm_cache_in=None,
                              lmm_cache_out=None,
                              lineage_samples=s.index)
     # non-matching lineage samples
     with self.assertRaises(SystemExit) as cm:
         x, y, z = initialise_lmm(p, cov, S,
                                  lmm_cache_in=None,
                                  lmm_cache_out=None,
                                  lineage_samples=s.index[:-1])
예제 #3
0
 def test_fit_lmm(self):
     p = pd.read_csv(P_BINARY,
                     index_col=0,
                     sep='\t')['binary']
     cov = pd.DataFrame([])
     x, y, z = initialise_lmm(p, cov, S,
                              lmm_cache_in=None,
                              lmm_cache_out=None)
     var = LMM('variant',
               'pattern',
               0.2,
               np.nan, np.nan, np.nan,
               np.nan, np.nan, np.nan,
               ['k%d' % x
                for x in range(p[p == 1].shape[0])],
               ['nk%d' % x
                for x in range(p[p == 0].shape[0])],
               set(), True, True)
     k = np.loadtxt(K)[:p.shape[0]]
     variants = [(var, p.values, k),]
     variant_mat = k.reshape(-1, 1)
     results = fit_lmm(y, z,
                       variants, variant_mat,
                       False, [], cov,
                       False, 1, 1)
     test_results = [LMM('variant', 'pattern', 0.2,
                         0.28252075514059294,
                         0.2920532220978148,
                         0.1513687600644123,
                         0.1420853593711293,
                         0.1519818397711344,
                         None,
                         ['k%d' % x
                          for x in range(p[p == 1].shape[0])],
                         ['nk%d' % x
                          for x in range(p[p == 0].shape[0])],
                         set(), False, False),]
     for var, test_var in zip(results, test_results):
         self.assertEqual(eq_lmm(var, test_var), set())
     # af filtering
     var = LMM('variant',
               None,
               0.2,
               np.nan, np.nan, np.nan,
               np.nan, np.nan, np.nan,
               ['k%d' % x
                for x in range(p[p == 1].shape[0])],
               ['nk%d' % x
                for x in range(p[p == 0].shape[0])],
               set(), True, True)
     variants = [(var, p.values, k),]
     variant_mat = k.reshape(-1, 1)
     results = fit_lmm(y, z,
                       variants, variant_mat,
                       False, [], cov,
                       False, 1, 1)
     test_results = [LMM('variant', None, 0.2,
                         np.nan, np.nan, np.nan,
                         np.nan, np.nan, np.nan,
                         ['k%d' % x
                          for x in range(p[p == 1].shape[0])],
                         ['nk%d' % x
                          for x in range(p[p == 0].shape[0])],
                         set(['af-filter']), True, False),]
     for var, test_var in zip(results, test_results):
         self.assertEqual(eq_lmm(var, test_var), set())
     # bad-chisq
     var = LMM('variant',
               'pattern',
               0.2,
               np.nan, np.nan, np.nan,
               np.nan, np.nan, np.nan,
               ['k%d' % x
                for x in range(p[p == 1].shape[0])],
               ['nk%d' % x
                for x in range(p[p == 0].shape[0])],
               set(), True, True)
     bad_k = np.array([1]*5 + [0]*(p.shape[0]-5))
     variants = [(var, p.values, bad_k),]
     variant_mat = bad_k.reshape(-1, 1)
     results = fit_lmm(y, z,
                       variants, variant_mat,
                       False, [], cov,
                       False, 1, 1)
     test_results = [LMM('variant', 'pattern', 0.2,
                         0.2544505826463333,
                         0.263519965703956,
                         0.2666666666666663,
                         0.2357022603955158,
                         0.16116459280507586,
                         None,
                         ['k%d' % x
                          for x in range(p[p == 1].shape[0])],
                         ['nk%d' % x
                          for x in range(p[p == 0].shape[0])],
                         set(['bad-chisq']), False, False),]
     for var, test_var in zip(results, test_results):
         self.assertEqual(eq_lmm(var, test_var), set())
     # pre-filtering
     var = LMM('variant',
               'pattern',
               0.2,
               np.nan, np.nan, np.nan,
               np.nan, np.nan, np.nan,
               ['k%d' % x
                for x in range(p[p == 1].shape[0])],
               ['nk%d' % x
                for x in range(p[p == 0].shape[0])],
               set(), True, True)
     k = np.loadtxt(K)[:p.shape[0]]
     variants = [(var, p.values, k),]
     variant_mat = k.reshape(-1, 1)
     results = fit_lmm(y, z,
                       variants, variant_mat,
                       False, [], cov,
                       False, 0.05, 1)
     test_results = [LMM('variant', 'pattern', 0.2,
                         0.28252075514059294,
                         np.nan, np.nan,
                         np.nan, np.nan, np.nan,
                         ['k%d' % x
                          for x in range(p[p == 1].shape[0])],
                         ['nk%d' % x
                          for x in range(p[p == 0].shape[0])],
                         set(['pre-filtering-failed']), True, False),]
     for var, test_var in zip(results, test_results):
         self.assertEqual(eq_lmm(var, test_var), set())
     # lrt-filtering
     var = LMM('variant',
               'pattern',
               0.2,
               np.nan, np.nan, np.nan,
               np.nan, np.nan, np.nan,
               ['k%d' % x
                for x in range(p[p == 1].shape[0])],
               ['nk%d' % x
                for x in range(p[p == 0].shape[0])],
               set(), True, True)
     k = np.loadtxt(K)[:p.shape[0]]
     variants = [(var, p.values, k),]
     variant_mat = k.reshape(-1, 1)
     results = fit_lmm(y, z,
                       variants, variant_mat,
                       False, [], cov,
                       False, 1, 0.05)
     test_results = [LMM('variant', 'pattern', 0.2,
                         0.28252075514059294,
                         0.2920532220978148,
                         np.nan, np.nan, np.nan, np.nan,
                         ['k%d' % x
                          for x in range(p[p == 1].shape[0])],
                         ['nk%d' % x
                          for x in range(p[p == 0].shape[0])],
                         set(['lrt-filtering-failed']), False, True),]
     for var, test_var in zip(results, test_results):
         self.assertEqual(eq_lmm(var, test_var), set())
     # lineage fit
     var = LMM('variant',
               'pattern',
               0.2,
               np.nan, np.nan, np.nan,
               np.nan, np.nan, np.nan,
               ['k%d' % x
                for x in range(p[p == 1].shape[0])],
               ['nk%d' % x
                for x in range(p[p == 0].shape[0])],
               set(), True, True)
     k = np.loadtxt(K)[:p.shape[0]]
     variants = [(var, p.values, k),]
     variant_mat = k.reshape(-1, 1)
     m = np.loadtxt(M)[:p.shape[0]]
     results = fit_lmm(y, z,
                       variants, variant_mat,
                       True, m, cov,
                       False, 1, 1)
     test_results = [LMM('variant', 'pattern', 0.2,
                         0.28252075514059294,
                         0.2920532220978148,
                         0.1513687600644123,
                         0.1420853593711293,
                         0.1519818397711344,
                         0,
                         ['k%d' % x
                          for x in range(p[p == 1].shape[0])],
                         ['nk%d' % x
                          for x in range(p[p == 0].shape[0])],
                         set(), False, False),]
     for var, test_var in zip(results, test_results):
         self.assertEqual(eq_lmm(var, test_var), set())
     # lineage fit + covariates
     var = LMM('variant',
               'pattern',
               0.2,
               np.nan, np.nan, np.nan,
               np.nan, np.nan, np.nan,
               ['k%d' % x
                for x in range(p[p == 1].shape[0])],
               ['nk%d' % x
                for x in range(p[p == 0].shape[0])],
               set(), True, True)
     k = np.loadtxt(K)[:p.shape[0]]
     variants = [(var, p.values, k),]
     variant_mat = k.reshape(-1, 1)
     m = np.loadtxt(M)[:p.shape[0]]
     cov = pd.read_csv(COV, index_col=0, header=None,
                       sep='\t').values
     results = fit_lmm(y, z,
                       variants, variant_mat,
                       True, m, cov,
                       False, 1, 1)
     test_results = [LMM('variant', 'pattern', 0.2,
                         0.28252075514059294,
                         0.2920532220978148,
                         0.1513687600644123,
                         0.1420853593711293,
                         0.1519818397711344,
                         0,
                         ['k%d' % x
                          for x in range(p[p == 1].shape[0])],
                         ['nk%d' % x
                          for x in range(p[p == 0].shape[0])],
                         set(), False, False),]
     for var, test_var in zip(results, test_results):
         self.assertEqual(eq_lmm(var, test_var), set())
     # continuous phenotype
     var = LMM('variant',
               'pattern',
               0.2,
               np.nan, np.nan, np.nan,
               np.nan, np.nan, np.nan,
               ['k%d' % x
                for x in range(p[p == 1].shape[0])],
               ['nk%d' % x
                for x in range(p[p == 0].shape[0])],
               set(), True, True)
     k = np.loadtxt(K)[:p.shape[0]]
     variants = [(var, p.values, k),]
     variant_mat = k.reshape(-1, 1)
     results = fit_lmm(y, z,
                       variants, variant_mat,
                       False, [], cov,
                       True, 1, 1)
     test_results = [LMM('variant', 'pattern', 0.2,
                         0.0,
                         0.2920532220978148,
                         0.1513687600644123,
                         0.1420853593711293,
                         0.1519818397711344,
                         None,
                         ['k%d' % x
                          for x in range(p[p == 1].shape[0])],
                         ['nk%d' % x
                          for x in range(p[p == 0].shape[0])],
                         set(), False, False),]
     for var, test_var in zip(results, test_results):
         self.assertEqual(eq_lmm(var, test_var), set())