def test_process_model_namedtuple():
    # checks that process_model doesn't alter values
    df = pd.DataFrame(columns=["value", "pvalue", "ci_lower", "ci_upper"])
    df["value"] = np.arange(10)
    df["pvalue"] = np.arange(10)
    df["ci_lower"] = np.arange(10)
    df["ci_upper"] = np.arange(10)
    info = {"stat1": 0, "stat2": 0}
    model = NamedTup(params=df, info=info)
    res = _process_model(model)
    afe(res.params, df)
    ase(pd.Series(res.info), pd.Series(info))
def test_process_model_dict():
    df = pd.DataFrame(columns=["value", "pvalue", "standard_error"])
    df["value"] = np.arange(10)
    df["pvalue"] = np.arange(10)
    df["standard_error"] = np.arange(10)
    info = {"stat1": 0, "stat2": 0}
    mod = {}
    mod["params"] = df
    mod["info"] = info
    res = _process_model(mod)
    afe(res.params, mod["params"])
    ase(pd.Series(res.info), pd.Series(mod["info"]))
def test_convert_model_to_series_without_inference():
    df = pd.DataFrame(
        np.array([[0.6, 2.3, 3.3], [0.11, 0.049, 0.009]]).T,
        columns=["value", "p_value"],
        index=["a", "b", "c"],
    ).astype("str")
    df["p_value"] = df["p_value"].astype("float")
    significance_levels = [0.1, 0.05, 0.01]
    show_stars = True
    res = _convert_frame_to_string_series(df, significance_levels, show_stars)
    exp = pd.Series(["0.6$^{ }$", "2.3$^{** }$", "3.3$^{*** }$"],
                    index=["a", "b", "c"],
                    name="")
    ase(exp, res)
def test_create_statistics_sr():
    df = pd.DataFrame(np.empty((10, 3)), columns=["a", "b", "c"])
    df.index = pd.MultiIndex.from_arrays(np.array([np.arange(10), np.arange(10)]))
    info_dict = {"rsquared": 0.45, "n_obs": 400}
    sig_dig = 2
    sig_levels = [0.1, 0.2]
    show_stars = False
    model = NamedTup(params=df, info=info_dict)
    stats_dict = {"Observations": "n_obs", "R2": "rsquared", "show_dof": False}
    res = _create_statistics_sr(model, stats_dict, sig_levels, show_stars, sig_dig)
    exp = pd.Series([str(400), str(0.45)])
    exp.index = pd.MultiIndex.from_arrays(
        np.array([np.array(["Observations", "R2"]), np.array(["", ""])])
    )
    ase(exp, res)
def test_convert_model_to_series_no_inference():
    df = pd.DataFrame(
        np.array([[0.6, 2.3, 3.3], [0.11, 0.049, 0.009], [0.6, 2.3, 3.3]]).T,
        columns=["value", "pvalue", "standard_error"],
        index=["a", "b", "c"],
    )
    si_lev = [0.1, 0.05, 0.01]
    si_dig = 2
    ci = False
    si = False
    ss = True
    res = _convert_model_to_series(df, si_lev, si_dig, si, ci, ss)
    exp = pd.Series(
        ["0.6$^{ }$", "2.3$^{** }$", "3.3$^{*** }$"], index=["a", "b", "c"], name=""
    )
    ase(exp, res)
def test_estimation_table():
    models = [est]
    return_type = "python"
    res = estimation_table(models, return_type, append_notes=False)
    exp = {}
    body_str = """
        index,{(1)}
        const,152.13$^{*** }$
        ,(2.85)
        Age,37.24$^{ }$
        ,(64.12)
        Sex,-106.58$^{* }$
        ,(62.13)
        BMI,787.18$^{*** }$
        ,(65.42)
        ABP,416.67$^{*** }$
        ,(69.49)
    """
    exp["body_df"] = _read_csv_string(body_str).fillna("")
    exp["body_df"].set_index("index", inplace=True)
    footer_str = """
         ,{(1)}
        Observations,442.0
        R$^2$,0.4
        Adj. R$^2$,0.39
        Residual Std. Error,59.98
        F Statistic,72.91$^{***}$
    """
    exp["footer_df"] = _read_csv_string(footer_str).fillna("")
    exp["footer_df"].set_index(" ", inplace=True)
    exp["footer_df"].index.names = [None]
    exp["footer_df"].index = pd.MultiIndex.from_arrays([exp["footer_df"].index])
    exp["notes_tex"] = "\\midrule\n"
    exp[
        "notes_html"
    ] = """<tr><td colspan="2" style="border-bottom: 1px solid black">
        </td></tr>"""

    afe(exp["footer_df"], res["footer_df"])
    afe(exp["body_df"], res["body_df"], check_index_type=False)
    ase(pd.Series(exp["notes_html"]), pd.Series(res["notes_html"]))
    ase(pd.Series(exp["notes_tex"]), pd.Series(res["notes_tex"]))
def test_null_db_read_windowcalc(null_db):
    infile = StringIO(
        'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t'
        'n_region_ind_snps\tind_id\tpop\ts_star\n' +
        '1 0 50000 333 4 161 msp_110 EUR 25211\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_111 EUR 25211\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_112 EUR 25212\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_113 EUR 25212\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_114 EUR 25212\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_115 EUR 25212\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_116 EUR 25212\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_116 ASN 25212\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_116 EUR 0\n'.replace(' ', '\t') +  # skip
        '')
    null_db.read_windowcalc(infile)

    index = pd.MultiIndex.from_tuples(
        [
            ('ASN', 162, 25212),
            ('EUR', 161, 25211),
            ('EUR', 162, 25211),
            ('EUR', 162, 25212),
        ],
        names=['pop', 'n_region_ind_snps', 's_star'])
    ase(null_db.DB, pd.Series([1, 1, 1, 5], index=index))

    infile = StringIO(
        'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t'
        'n_region_ind_snps\tind_id\tpop\ts_star\n' +
        '1 0 50000 333 4 161 msp_110 EUR 25211\n'.replace(' ', '\t') +
        '1 0 50000 333 4 162 msp_111 EUR 25211\n'.replace(' ', '\t') + '')
    null_db.read_windowcalc(infile)

    index = pd.MultiIndex.from_tuples(
        [
            ('ASN', 162, 25212),
            ('EUR', 161, 25211),
            ('EUR', 162, 25211),
            ('EUR', 162, 25212),
        ],
        names=['pop', 'n_region_ind_snps', 's_star'])
    ase(null_db.DB, pd.Series([1, 2, 2, 5], index=index))
def test_process_model_stats_model():
    params = pd.DataFrame(
        columns=["value", "p_value", "standard_error", "ci_lower", "ci_upper"],
        index=["const", "Age", "Sex", "BMI", "ABP"],
    )
    params["value"] = [
        152.133484, 37.241211, -106.577520, 787.179313, 416.673772
    ]
    params["p_value"] = [
        2.048808e-193,
        5.616557e-01,
        8.695658e-02,
        5.345260e-29,
        4.245663e-09,
    ]
    params["standard_error"] = [
        2.852749, 64.117433, 62.125062, 65.424126, 69.494666
    ]
    params["ci_lower"] = [
        146.526671, -88.775663, -228.678572, 658.594255, 280.088446
    ]
    params["ci_upper"] = [
        157.740298, 163.258084, 15.523532, 915.764371, 553.259097
    ]
    info = {}
    info["rsquared"] = 0.40026108237714
    info["rsquared_adj"] = 0.39477148130050055
    info["fvalue"] = 72.91259907398705
    info["f_pvalue"] = 2.700722880950139e-47
    info["df_model"] = 4.0
    info["df_resid"] = 437.0
    info["resid_std_err"] = 59.97560860753488
    info["n_obs"] = 442.0
    res = _process_model(est)
    afe(res["params"], params)
    ase(pd.Series(res["info"]), pd.Series(info))
    assert res["name"] == "target"
def test_null_db_get_sstar(null_db):
    index = pd.MultiIndex.from_tuples(
        [
            ('EUR', 161, 25208),
            ('EUR', 161, 25211),
            ('EUR', 165, 25210),
            ('EUR', 165, 25211),
            ('EUR', 165, 25212),
        ],
        names=['pop', 'n_region_ind_snps', 's_star'])
    null_db.DB = pd.Series([1, 1, 2, 2, 6], index=index)

    # not a valid pop
    with pytest.raises(ValueError) as e:
        null_db.get_sstar('ASN', 161)
    assert 'Population "ASN" not found in null database' in str(e)

    # exists, get values
    ase(null_db.get_sstar('EUR', 161),
        pd.Series([1, 1], index=pd.Index([25208, 25211], name='s_star')))

    # low, get min
    ase(null_db.get_sstar('EUR', 157),
        pd.Series([1, 1], index=pd.Index([25208, 25211], name='s_star')))

    # high, get max
    ase(
        null_db.get_sstar('EUR', 167),
        pd.Series([2, 2, 6],
                  index=pd.Index([25210, 25211, 25212], name='s_star')))

    # interpolate, nearer 161
    ase(
        null_db.get_sstar('EUR', 162),
        pd.Series([3 / 4, 2 / 4, 5 / 4, 6 / 4],
                  index=pd.Index([25208, 25210, 25211, 25212], name='s_star')))

    # interpolate, halfway
    ase(
        null_db.get_sstar('EUR', 163),
        pd.Series([1 / 2, 1, 3 / 2, 3],
                  index=pd.Index([25208, 25210, 25211, 25212], name='s_star')))

    # interpolate, nearer 165
    ase(
        null_db.get_sstar('EUR', 164),
        pd.Series([1 / 4, 6 / 4, 7 / 4, 18 / 4],
                  index=pd.Index([25208, 25210, 25211, 25212], name='s_star')))
Esempio n. 10
0
def test_null_db_init(null_db):
    ase(
        null_db.DB,
        pd.Series(dtype='int64',
                  index=pd.MultiIndex.from_tuples(
                      [], names=['pop', 'n_region_ind_snps', 's_star'])))
Esempio n. 11
0
def test_main_build_null_db():
    runner = CliRunner()
    with runner.isolated_filesystem():
        # build chromosome file
        with open('chroms.txt', 'w') as output:
            output.write('10\n' '12\n' '16\n')

        # make fake window files
        with gzip.open('10.windowcalc.gz', 'wt') as output:
            output.write(
                'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t'
                'n_region_ind_snps\tind_id\tpop\ts_star\n' +
                '2 0 50000 333 4 161 msp_110 EUR 25211\n'.replace(' ', '\t') +
                '2 0 50000 333 4 162 msp_111 EUR 25211\n'.replace(' ', '\t') +
                '2 0 50000 333 4 162 msp_112 EUR 25212\n'.replace(' ', '\t') +
                '2 0 50000 333 4 162 msp_113 EUR 25212\n'.replace(' ', '\t') +
                '2 0 50000 333 4 162 msp_114 EUR 25212\n'.replace(' ', '\t') +
                '2 0 50000 333 4 162 msp_115 EUR 25212\n'.replace(' ', '\t') +
                '2 0 50000 333 4 162 msp_116 EUR 25212\n'.replace(' ', '\t') +
                '2 0 50000 333 4 162 msp_116 ASN 25212\n'.replace(' ', '\t') +
                '')

        with gzip.open('12.windowcalc.gz', 'wt') as output:
            output.write(
                'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t'
                'n_region_ind_snps\tind_id\tpop\ts_star\n' +
                '3 0 50000 333 4 161 msp_110 ASN 25211\n'.replace(' ', '\t') +
                '3 0 50000 333 4 162 msp_111 EUR 25211\n'.replace(' ', '\t') +
                '3 0 50000 333 4 162 msp_112 ASN 25212\n'.replace(' ', '\t') +
                '3 0 50000 333 4 162 msp_113 EUR 25212\n'.replace(' ', '\t') +
                '3 0 50000 333 4 162 msp_114 ASN 25212\n'.replace(' ', '\t') +
                '3 0 50000 333 4 162 msp_115 EUR 25212\n'.replace(' ', '\t') +
                '3 0 50000 333 4 162 msp_116 EUR 25212\n'.replace(' ', '\t') +
                '3 0 50000 333 4 162 msp_116 ASN 25212\n'.replace(' ', '\t') +
                '')

        with gzip.open('16.windowcalc.gz', 'wt') as output:
            output.write(
                'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t'
                'n_region_ind_snps\tind_id\tpop\ts_star\n' +
                '1 0 50000 333 4 164 msp_110 EUR 25211\n'.replace(' ', '\t') +
                '1 0 50000 333 4 164 msp_111 ASN 25211\n'.replace(' ', '\t') +
                '1 0 50000 333 4 164 msp_112 EUR 25212\n'.replace(' ', '\t') +
                '1 0 50000 333 4 164 msp_113 ASN 25212\n'.replace(' ', '\t') +
                '1 0 50000 333 4 164 msp_114 ASN 25212\n'.replace(' ', '\t') +
                '1 0 50000 333 4 164 msp_115 EUR 25212\n'.replace(' ', '\t') +
                '1 0 50000 333 4 164 msp_116 EUR 25212\n'.replace(' ', '\t') +
                '1 0 50000 333 4 164 msp_116 ASN 25212\n'.replace(' ', '\t') +
                '')

        with gzip.open('18.windowcalc.gz', 'wt') as output:
            output.write(
                'chrom\twinstart\twinend\tn_snps\tn_ind_snps\t'
                'n_region_ind_snps\tind_id\tpop\ts_star\n' +
                '1 0 50000 333 4 161 msp_110 EUR 25211\n'.replace(' ', '\t') +
                'NOT READ\n'
                '1 0 50000 333 4 162 msp_116 ASN 25212\n'.replace(' ', '\t') +
                '')

        # build db
        result = runner.invoke(
            Sstar_ECDF.main, ' build-null-db '
            '--chr-list chroms.txt '
            '--outfile test.pkl')

        assert result.exit_code == 0

        # check db
        assert os.path.exists('test.pkl')

        db = Sstar_ECDF.Null_DB()
        db.load('test.pkl')
        index = pd.MultiIndex.from_tuples(
            [
                ('ASN', 161, 25211),
                ('ASN', 162, 25212),
                ('ASN', 164, 25211),
                ('ASN', 164, 25212),
                ('EUR', 161, 25211),
                ('EUR', 162, 25211),
                ('EUR', 162, 25212),
                ('EUR', 164, 25211),
                ('EUR', 164, 25212),
            ],
            names=['pop', 'n_region_ind_snps', 's_star'])
        ase(db.DB, pd.Series([1, 4, 1, 3, 1, 2, 8, 1, 3], index=index))
Esempio n. 12
0
def test_main_combine_null_dbs():
    runner = CliRunner()
    with runner.isolated_filesystem():
        # build up null dbs
        index = pd.MultiIndex.from_tuples(
            [
                ('ASN', 161, 25211),
                ('ASN', 162, 25212),
                ('ASN', 164, 25211),
                ('ASN', 164, 25212),
                ('EUR', 161, 25211),
                ('EUR', 162, 25211),
                ('EUR', 162, 25212),
                ('EUR', 164, 25211),
                ('EUR', 164, 25212),
                ('EUR', 164, 25213),
                ('EUR', 164, 25214),
                ('EUR', 164, 25215),
            ],
            names=['pop', 'n_region_ind_snps', 's_star'])
        dat = list(range(12))
        null_db = Sstar_ECDF.Null_DB()
        null_db.DB = pd.Series(dat, index=index)
        null_db.save('null.pkl')

        index = pd.MultiIndex.from_tuples(
            [
                ('ASN', 161, 25211),
                ('ASN', 162, 25212),
                ('ASN', 164, 25211),
                ('ASN', 164, 25212),
                ('EUR', 161, 25211),
                ('EUR', 162, 25211),
                ('EUR', 162, 25212),
                ('EUR', 164, 25211),
                ('EUR', 164, 25212),
                ('EUR', 164, 25213),
                ('EUR', 164, 25214),
                ('EUR', 164, 25215),
            ],
            names=['pop', 'n_region_ind_snps', 's_star'])
        dat = list(range(0, 24, 2))
        null_db = Sstar_ECDF.Null_DB()
        null_db.DB = pd.Series(dat, index=index)
        null_db.save('null2.pkl')

        index = pd.MultiIndex.from_tuples(
            [
                ('ASN', 161, 25211),
                ('ASN', 162, 25212),
                ('ASN', 164, 25211),
                ('ASN', 164, 25212),
                ('EUR', 161, 25211),
                ('EUR', 162, 25211),
                ('EUR', 162, 25212),
                ('EUR', 164, 25211),
                ('EUR', 164, 25212),
                ('EUR', 164, 25213),
                ('EUR', 164, 25214),
                ('EUR', 164, 25215),
            ],
            names=['pop', 'n_region_ind_snps', 's_star'])
        dat = list(range(0, 36, 3))
        null_db = Sstar_ECDF.Null_DB()
        null_db.DB = pd.Series(dat, index=index)
        null_db.save('null3.pkl')

        # run main
        result = runner.invoke(
            Sstar_ECDF.main, 'combine-null-dbs '
            '--outfile test.pkl '
            'null.pkl null2.pkl null3.pkl')
        assert result.exit_code == 0

        # check output files
        db = Sstar_ECDF.Null_DB()
        db.load('test.pkl')
        index = pd.MultiIndex.from_tuples(
            [
                ('ASN', 161, 25211),
                ('ASN', 162, 25212),
                ('ASN', 164, 25211),
                ('ASN', 164, 25212),
                ('EUR', 161, 25211),
                ('EUR', 162, 25211),
                ('EUR', 162, 25212),
                ('EUR', 164, 25211),
                ('EUR', 164, 25212),
                ('EUR', 164, 25213),
                ('EUR', 164, 25214),
                ('EUR', 164, 25215),
            ],
            names=['pop', 'n_region_ind_snps', 's_star'])
        ase(db.DB, pd.Series(range(0, 72, 6), index=index))