def test_process_model_namedtuple(): # checks that process_model doesn't alter values df = pd.DataFrame(columns=["value", "pvalue", "ci_lower", "ci_upper"]) df["value"] = np.arange(10) df["pvalue"] = np.arange(10) df["ci_lower"] = np.arange(10) df["ci_upper"] = np.arange(10) info = {"stat1": 0, "stat2": 0} model = NamedTup(params=df, info=info) res = _process_model(model) afe(res.params, df) ase(pd.Series(res.info), pd.Series(info))
def test_process_model_dict(): df = pd.DataFrame(columns=["value", "pvalue", "standard_error"]) df["value"] = np.arange(10) df["pvalue"] = np.arange(10) df["standard_error"] = np.arange(10) info = {"stat1": 0, "stat2": 0} mod = {} mod["params"] = df mod["info"] = info res = _process_model(mod) afe(res.params, mod["params"]) ase(pd.Series(res.info), pd.Series(mod["info"]))
def test_convert_model_to_series_without_inference(): df = pd.DataFrame( np.array([[0.6, 2.3, 3.3], [0.11, 0.049, 0.009]]).T, columns=["value", "p_value"], index=["a", "b", "c"], ).astype("str") df["p_value"] = df["p_value"].astype("float") significance_levels = [0.1, 0.05, 0.01] show_stars = True res = _convert_frame_to_string_series(df, significance_levels, show_stars) exp = pd.Series(["0.6$^{ }$", "2.3$^{** }$", "3.3$^{*** }$"], index=["a", "b", "c"], name="") ase(exp, res)
def test_create_statistics_sr(): df = pd.DataFrame(np.empty((10, 3)), columns=["a", "b", "c"]) df.index = pd.MultiIndex.from_arrays(np.array([np.arange(10), np.arange(10)])) info_dict = {"rsquared": 0.45, "n_obs": 400} sig_dig = 2 sig_levels = [0.1, 0.2] show_stars = False model = NamedTup(params=df, info=info_dict) stats_dict = {"Observations": "n_obs", "R2": "rsquared", "show_dof": False} res = _create_statistics_sr(model, stats_dict, sig_levels, show_stars, sig_dig) exp = pd.Series([str(400), str(0.45)]) exp.index = pd.MultiIndex.from_arrays( np.array([np.array(["Observations", "R2"]), np.array(["", ""])]) ) ase(exp, res)
def test_convert_model_to_series_no_inference(): df = pd.DataFrame( np.array([[0.6, 2.3, 3.3], [0.11, 0.049, 0.009], [0.6, 2.3, 3.3]]).T, columns=["value", "pvalue", "standard_error"], index=["a", "b", "c"], ) si_lev = [0.1, 0.05, 0.01] si_dig = 2 ci = False si = False ss = True res = _convert_model_to_series(df, si_lev, si_dig, si, ci, ss) exp = pd.Series( ["0.6$^{ }$", "2.3$^{** }$", "3.3$^{*** }$"], index=["a", "b", "c"], name="" ) ase(exp, res)
def test_estimation_table(): models = [est] return_type = "python" res = estimation_table(models, return_type, append_notes=False) exp = {} body_str = """ index,{(1)} const,152.13$^{*** }$ ,(2.85) Age,37.24$^{ }$ ,(64.12) Sex,-106.58$^{* }$ ,(62.13) BMI,787.18$^{*** }$ ,(65.42) ABP,416.67$^{*** }$ ,(69.49) """ exp["body_df"] = _read_csv_string(body_str).fillna("") exp["body_df"].set_index("index", inplace=True) footer_str = """ ,{(1)} Observations,442.0 R$^2$,0.4 Adj. R$^2$,0.39 Residual Std. Error,59.98 F Statistic,72.91$^{***}$ """ exp["footer_df"] = _read_csv_string(footer_str).fillna("") exp["footer_df"].set_index(" ", inplace=True) exp["footer_df"].index.names = [None] exp["footer_df"].index = pd.MultiIndex.from_arrays([exp["footer_df"].index]) exp["notes_tex"] = "\\midrule\n" exp[ "notes_html" ] = """<tr><td colspan="2" style="border-bottom: 1px solid black"> </td></tr>""" afe(exp["footer_df"], res["footer_df"]) afe(exp["body_df"], res["body_df"], check_index_type=False) ase(pd.Series(exp["notes_html"]), pd.Series(res["notes_html"])) ase(pd.Series(exp["notes_tex"]), pd.Series(res["notes_tex"]))
def test_null_db_read_windowcalc(null_db): infile = StringIO( 'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t' 'n_region_ind_snps\tind_id\tpop\ts_star\n' + '1 0 50000 333 4 161 msp_110 EUR 25211\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_111 EUR 25211\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_112 EUR 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_113 EUR 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_114 EUR 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_115 EUR 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_116 EUR 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_116 ASN 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_116 EUR 0\n'.replace(' ', '\t') + # skip '') null_db.read_windowcalc(infile) index = pd.MultiIndex.from_tuples( [ ('ASN', 162, 25212), ('EUR', 161, 25211), ('EUR', 162, 25211), ('EUR', 162, 25212), ], names=['pop', 'n_region_ind_snps', 's_star']) ase(null_db.DB, pd.Series([1, 1, 1, 5], index=index)) infile = StringIO( 'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t' 'n_region_ind_snps\tind_id\tpop\ts_star\n' + '1 0 50000 333 4 161 msp_110 EUR 25211\n'.replace(' ', '\t') + '1 0 50000 333 4 162 msp_111 EUR 25211\n'.replace(' ', '\t') + '') null_db.read_windowcalc(infile) index = pd.MultiIndex.from_tuples( [ ('ASN', 162, 25212), ('EUR', 161, 25211), ('EUR', 162, 25211), ('EUR', 162, 25212), ], names=['pop', 'n_region_ind_snps', 's_star']) ase(null_db.DB, pd.Series([1, 2, 2, 5], index=index))
def test_process_model_stats_model(): params = pd.DataFrame( columns=["value", "p_value", "standard_error", "ci_lower", "ci_upper"], index=["const", "Age", "Sex", "BMI", "ABP"], ) params["value"] = [ 152.133484, 37.241211, -106.577520, 787.179313, 416.673772 ] params["p_value"] = [ 2.048808e-193, 5.616557e-01, 8.695658e-02, 5.345260e-29, 4.245663e-09, ] params["standard_error"] = [ 2.852749, 64.117433, 62.125062, 65.424126, 69.494666 ] params["ci_lower"] = [ 146.526671, -88.775663, -228.678572, 658.594255, 280.088446 ] params["ci_upper"] = [ 157.740298, 163.258084, 15.523532, 915.764371, 553.259097 ] info = {} info["rsquared"] = 0.40026108237714 info["rsquared_adj"] = 0.39477148130050055 info["fvalue"] = 72.91259907398705 info["f_pvalue"] = 2.700722880950139e-47 info["df_model"] = 4.0 info["df_resid"] = 437.0 info["resid_std_err"] = 59.97560860753488 info["n_obs"] = 442.0 res = _process_model(est) afe(res["params"], params) ase(pd.Series(res["info"]), pd.Series(info)) assert res["name"] == "target"
def test_null_db_get_sstar(null_db): index = pd.MultiIndex.from_tuples( [ ('EUR', 161, 25208), ('EUR', 161, 25211), ('EUR', 165, 25210), ('EUR', 165, 25211), ('EUR', 165, 25212), ], names=['pop', 'n_region_ind_snps', 's_star']) null_db.DB = pd.Series([1, 1, 2, 2, 6], index=index) # not a valid pop with pytest.raises(ValueError) as e: null_db.get_sstar('ASN', 161) assert 'Population "ASN" not found in null database' in str(e) # exists, get values ase(null_db.get_sstar('EUR', 161), pd.Series([1, 1], index=pd.Index([25208, 25211], name='s_star'))) # low, get min ase(null_db.get_sstar('EUR', 157), pd.Series([1, 1], index=pd.Index([25208, 25211], name='s_star'))) # high, get max ase( null_db.get_sstar('EUR', 167), pd.Series([2, 2, 6], index=pd.Index([25210, 25211, 25212], name='s_star'))) # interpolate, nearer 161 ase( null_db.get_sstar('EUR', 162), pd.Series([3 / 4, 2 / 4, 5 / 4, 6 / 4], index=pd.Index([25208, 25210, 25211, 25212], name='s_star'))) # interpolate, halfway ase( null_db.get_sstar('EUR', 163), pd.Series([1 / 2, 1, 3 / 2, 3], index=pd.Index([25208, 25210, 25211, 25212], name='s_star'))) # interpolate, nearer 165 ase( null_db.get_sstar('EUR', 164), pd.Series([1 / 4, 6 / 4, 7 / 4, 18 / 4], index=pd.Index([25208, 25210, 25211, 25212], name='s_star')))
def test_null_db_init(null_db): ase( null_db.DB, pd.Series(dtype='int64', index=pd.MultiIndex.from_tuples( [], names=['pop', 'n_region_ind_snps', 's_star'])))
def test_main_build_null_db(): runner = CliRunner() with runner.isolated_filesystem(): # build chromosome file with open('chroms.txt', 'w') as output: output.write('10\n' '12\n' '16\n') # make fake window files with gzip.open('10.windowcalc.gz', 'wt') as output: output.write( 'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t' 'n_region_ind_snps\tind_id\tpop\ts_star\n' + '2 0 50000 333 4 161 msp_110 EUR 25211\n'.replace(' ', '\t') + '2 0 50000 333 4 162 msp_111 EUR 25211\n'.replace(' ', '\t') + '2 0 50000 333 4 162 msp_112 EUR 25212\n'.replace(' ', '\t') + '2 0 50000 333 4 162 msp_113 EUR 25212\n'.replace(' ', '\t') + '2 0 50000 333 4 162 msp_114 EUR 25212\n'.replace(' ', '\t') + '2 0 50000 333 4 162 msp_115 EUR 25212\n'.replace(' ', '\t') + '2 0 50000 333 4 162 msp_116 EUR 25212\n'.replace(' ', '\t') + '2 0 50000 333 4 162 msp_116 ASN 25212\n'.replace(' ', '\t') + '') with gzip.open('12.windowcalc.gz', 'wt') as output: output.write( 'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t' 'n_region_ind_snps\tind_id\tpop\ts_star\n' + '3 0 50000 333 4 161 msp_110 ASN 25211\n'.replace(' ', '\t') + '3 0 50000 333 4 162 msp_111 EUR 25211\n'.replace(' ', '\t') + '3 0 50000 333 4 162 msp_112 ASN 25212\n'.replace(' ', '\t') + '3 0 50000 333 4 162 msp_113 EUR 25212\n'.replace(' ', '\t') + '3 0 50000 333 4 162 msp_114 ASN 25212\n'.replace(' ', '\t') + '3 0 50000 333 4 162 msp_115 EUR 25212\n'.replace(' ', '\t') + '3 0 50000 333 4 162 msp_116 EUR 25212\n'.replace(' ', '\t') + '3 0 50000 333 4 162 msp_116 ASN 25212\n'.replace(' ', '\t') + '') with gzip.open('16.windowcalc.gz', 'wt') as output: output.write( 'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t' 'n_region_ind_snps\tind_id\tpop\ts_star\n' + '1 0 50000 333 4 164 msp_110 EUR 25211\n'.replace(' ', '\t') + '1 0 50000 333 4 164 msp_111 ASN 25211\n'.replace(' ', '\t') + '1 0 50000 333 4 164 msp_112 EUR 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 164 msp_113 ASN 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 164 msp_114 ASN 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 164 msp_115 EUR 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 164 msp_116 EUR 25212\n'.replace(' ', '\t') + '1 0 50000 333 4 164 msp_116 ASN 25212\n'.replace(' ', '\t') + '') with gzip.open('18.windowcalc.gz', 'wt') as output: output.write( 'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t' 'n_region_ind_snps\tind_id\tpop\ts_star\n' + '1 0 50000 333 4 161 msp_110 EUR 25211\n'.replace(' ', '\t') + 'NOT READ\n' '1 0 50000 333 4 162 msp_116 ASN 25212\n'.replace(' ', '\t') + '') # build db result = runner.invoke( Sstar_ECDF.main, ' build-null-db ' '--chr-list chroms.txt ' '--outfile test.pkl') assert result.exit_code == 0 # check db assert os.path.exists('test.pkl') db = Sstar_ECDF.Null_DB() db.load('test.pkl') index = pd.MultiIndex.from_tuples( [ ('ASN', 161, 25211), ('ASN', 162, 25212), ('ASN', 164, 25211), ('ASN', 164, 25212), ('EUR', 161, 25211), ('EUR', 162, 25211), ('EUR', 162, 25212), ('EUR', 164, 25211), ('EUR', 164, 25212), ], names=['pop', 'n_region_ind_snps', 's_star']) ase(db.DB, pd.Series([1, 4, 1, 3, 1, 2, 8, 1, 3], index=index))
def test_main_combine_null_dbs(): runner = CliRunner() with runner.isolated_filesystem(): # build up null dbs index = pd.MultiIndex.from_tuples( [ ('ASN', 161, 25211), ('ASN', 162, 25212), ('ASN', 164, 25211), ('ASN', 164, 25212), ('EUR', 161, 25211), ('EUR', 162, 25211), ('EUR', 162, 25212), ('EUR', 164, 25211), ('EUR', 164, 25212), ('EUR', 164, 25213), ('EUR', 164, 25214), ('EUR', 164, 25215), ], names=['pop', 'n_region_ind_snps', 's_star']) dat = list(range(12)) null_db = Sstar_ECDF.Null_DB() null_db.DB = pd.Series(dat, index=index) null_db.save('null.pkl') index = pd.MultiIndex.from_tuples( [ ('ASN', 161, 25211), ('ASN', 162, 25212), ('ASN', 164, 25211), ('ASN', 164, 25212), ('EUR', 161, 25211), ('EUR', 162, 25211), ('EUR', 162, 25212), ('EUR', 164, 25211), ('EUR', 164, 25212), ('EUR', 164, 25213), ('EUR', 164, 25214), ('EUR', 164, 25215), ], names=['pop', 'n_region_ind_snps', 's_star']) dat = list(range(0, 24, 2)) null_db = Sstar_ECDF.Null_DB() null_db.DB = pd.Series(dat, index=index) null_db.save('null2.pkl') index = pd.MultiIndex.from_tuples( [ ('ASN', 161, 25211), ('ASN', 162, 25212), ('ASN', 164, 25211), ('ASN', 164, 25212), ('EUR', 161, 25211), ('EUR', 162, 25211), ('EUR', 162, 25212), ('EUR', 164, 25211), ('EUR', 164, 25212), ('EUR', 164, 25213), ('EUR', 164, 25214), ('EUR', 164, 25215), ], names=['pop', 'n_region_ind_snps', 's_star']) dat = list(range(0, 36, 3)) null_db = Sstar_ECDF.Null_DB() null_db.DB = pd.Series(dat, index=index) null_db.save('null3.pkl') # run main result = runner.invoke( Sstar_ECDF.main, 'combine-null-dbs ' '--outfile test.pkl ' 'null.pkl null2.pkl null3.pkl') assert result.exit_code == 0 # check output files db = Sstar_ECDF.Null_DB() db.load('test.pkl') index = pd.MultiIndex.from_tuples( [ ('ASN', 161, 25211), ('ASN', 162, 25212), ('ASN', 164, 25211), ('ASN', 164, 25212), ('EUR', 161, 25211), ('EUR', 162, 25211), ('EUR', 162, 25212), ('EUR', 164, 25211), ('EUR', 164, 25212), ('EUR', 164, 25213), ('EUR', 164, 25214), ('EUR', 164, 25215), ], names=['pop', 'n_region_ind_snps', 's_star']) ase(db.DB, pd.Series(range(0, 72, 6), index=index))