Ejemplo n.º 1
0
 def test_fit_null_binary(self):
     p = np.loadtxt(P_BINARY)
     m = np.loadtxt(M)
     cov = pd.DataFrame([])
     # no covariates
     params = np.array([
         -1.41572498, 0.35847998, -0.03014792, 2.46252819, 0.96908425,
         -0.20952455, -0.27988125, 0.36798503, -0.03278285, -1.34132024,
         0.844149
     ])
     null_res = fit_null(p, m, cov, False, firth=False)
     self.assertTrue(abs((params - null_res.params).max()) < 1E-7)
     # no covariates, firth regression
     null_res = fit_null(p, m, cov, False, firth=True)
     self.assertAlmostEqual(null_res, -57.884527394557985)
     # covariates
     cov = np.loadtxt(COV)
     cov = pd.DataFrame(cov)
     params = np.array([
         -0.87072948, 0.26456701, 0.03485904, 2.80243184, 1.086393,
         -0.3882244, -0.46883396, 0.61387846, 0.09962477, -1.45376984,
         0.93929299, 0.07927743, -1.54631396, 0.1098796
     ])
     null_res = fit_null(p, m, cov, False, firth=False)
     self.assertTrue(abs((params - null_res.params).max()) < 1E-7)
     # covariates, firth regression
     null_res = fit_null(p, m, cov, False, firth=True)
     self.assertAlmostEqual(null_res, -55.60790630835098)
     # perfectly separable data
     p = np.array([1] * 10 + [0] * 90)
     m = np.array([1] * 10 + [0] * 90).reshape(-1, 1)
     cov = pd.DataFrame([])
     self.assertEqual(fit_null(p, m, cov, False, False), None)
Ejemplo n.º 2
0
 def test_fit_null_continuous(self):
     p_cont = np.loadtxt(P_CONT)
     m = np.loadtxt(M)
     # no covariates
     params = np.array([0.65572473, -0.16129649, 0.03417796, -0.08011702,
                        0.10902641, 0.00599514, -0.09081684, -0.13653787,
                        0.17798003, -0.16793408, 0.12959982])
     null_res = fit_null(p_cont, m, pd.DataFrame([]), True, firth=False)
     self.assertTrue(abs((params - null_res.params).max())
                     < 1E-7)
     # covariates
     cov = np.loadtxt(COV)
     cov = pd.DataFrame(cov)
     params = np.array([0.49070237, -0.17284083, 0.00710691, -0.11784811,
                        0.07352861, 0.01219004, -0.04772721, -0.17089199,
                        0.18198025, -0.17141095, 0.11330439, 0.08887165,
                        0.20304982, 0.13802362])
     null_res = fit_null(p_cont, m, cov, True, firth=False)
     self.assertTrue(abs((params - null_res.params).max())
                     < 1E-7)
Ejemplo n.º 3
0
 def test_find_enet_selected_binary(self):
     p = pd.read_csv(P,
                     index_col=0,
                     sep='\t')['binary']
     b = np.array([5.60000000e-01, 2.16450216e-37, -1.81966726e-37,
                   1.99034682e-38, -1.34400134e-37, -2.94000294e-38,
                   -1.90213827e-37, 8.08080808e-38,
                   1.89393939e-37])
     idx = [10, 29, 39, 95, 110, 153, 156, 164]
     g = find_enet_selected(b, idx, p, np.array([[]]), 'vcf',
                            None, False, None,
                            VariantFile(VCF), set(p.index),
                            None, False, False, None, False)
     v = next(g)
     self.assertEqual(v.kmer, 'FM211187_83_G_A')
     self.assertEqual(v.af, 0.28)
     self.assertTrue(abs(v.prep - 0.17050715825327736) < 1E-7)
     self.assertTrue(np.isnan(v.pvalue))
     self.assertTrue(abs(v.kbeta - 2.164502164502162e-37) < 1E-7)
     self.assertEqual(v.max_lineage, None)
     self.assertEqual(v.kstrains,
                     [
                      'sample_10', 'sample_11', 'sample_13', 'sample_18',
                      'sample_19',
                      'sample_20', 'sample_23', 'sample_25', 'sample_26',
                      'sample_31', 'sample_34', 'sample_36', 'sample_40',
                      'sample_45'
                     ]
                     )
     self.assertEqual(v.nkstrains,
                     [
                     'sample_1', 'sample_12', 'sample_14', 'sample_15',
                     'sample_16', 'sample_17', 'sample_2', 'sample_21',
                     'sample_22', 'sample_24', 'sample_27', 'sample_28',
                     'sample_29', 'sample_3', 'sample_30', 'sample_32',
                     'sample_33', 'sample_35', 'sample_37', 'sample_38',
                     'sample_39', 'sample_4', 'sample_41', 'sample_42',
                     'sample_43', 'sample_44', 'sample_46', 'sample_47',
                     'sample_48', 'sample_49', 'sample_5', 'sample_50',
                     'sample_6', 'sample_7', 'sample_8', 'sample_9'
                     ]
                     )
     self.assertEqual(len(v.notes), 0)
     # read to exhaustion
     for v in g:
         pass
     self.assertEqual(v.kmer, 'FM211187_3592_G_A')
     # with fixed effects
     pf = np.loadtxt(PFIRTH)
     mf = np.loadtxt(MFIRTH)
     cov = pd.DataFrame([])
     null_res = fit_null(pf, mf, cov, False, firth=False).llr
     null_firth = fit_null(pf, mf, cov, False, firth=True)
     g = find_enet_selected(b, idx, p, np.array([[]]), 'vcf',
                            (mf, null_res, null_firth), False,
                            None, VariantFile(VCF), set(p.index),
                            None, False, False, None, False)
     v = next(g)
     self.assertEqual(v.kmer, 'FM211187_83_G_A')
     self.assertEqual(v.af, 0.28)
     self.assertTrue(abs(v.prep - 0.17050715825327736) < 1E-7)
     self.assertEqual(v.pvalue, 1)
     self.assertTrue(abs(v.kbeta - 2.164502164502162e-37) < 1E-7)
     self.assertEqual(v.max_lineage, None)
     self.assertEqual(v.kstrains,
                     [
                      'sample_10', 'sample_11', 'sample_13', 'sample_18',
                      'sample_19',
                      'sample_20', 'sample_23', 'sample_25', 'sample_26',
                      'sample_31', 'sample_34', 'sample_36', 'sample_40',
                      'sample_45'
                     ]
                     )
     self.assertEqual(v.nkstrains,
                     [
                     'sample_1', 'sample_12', 'sample_14', 'sample_15',
                     'sample_16', 'sample_17', 'sample_2', 'sample_21',
                     'sample_22', 'sample_24', 'sample_27', 'sample_28',
                     'sample_29', 'sample_3', 'sample_30', 'sample_32',
                     'sample_33', 'sample_35', 'sample_37', 'sample_38',
                     'sample_39', 'sample_4', 'sample_41', 'sample_42',
                     'sample_43', 'sample_44', 'sample_46', 'sample_47',
                     'sample_48', 'sample_49', 'sample_5', 'sample_50',
                     'sample_6', 'sample_7', 'sample_8', 'sample_9'
                     ]
                     )
     self.assertEqual(len(v.notes), 0)
     # read to exhaustion
     for v in g:
         pass
     self.assertEqual(v.kmer, 'FM211187_3592_G_A')