Example #1
0
def test_remove_empty():
    cosmic = pd.read_csv(StringIO('Accession Number\tchrom\tstart\tend\n'
                                  'ENST297338\t8\t117869609\t117869609\n'
                                  'ENST440973\t6\t152129237\t152129237\n'
                                  'ENST440974\t6\t152129237\t152129238\n'
                                  'ENST440975\t6\t152129236\t152129238\n'),
                         sep='\t')
    with pytest.raises(ValueError) as e:
        remove_empty(cosmic)
    assert 'No matches to write' in str(e)

    cosmic = pd.read_csv(StringIO(
        'Accession Number\tchrom\tstart\tend\ttest\ttest2\n'
        'ENST297338\t8\t117869609\t117869609\t1\t1\n'
        'ENST440973\t6\t152129237\t152129237\t0\t1\n'
        'ENST440974\t6\t152129237\t152129238\t0\t2\n'
        'ENST440975\t6\t152129236\t152129238\t0\t3\n'),
                         sep='\t')
    afe(cosmic, remove_empty(cosmic))

    cosmic = pd.read_csv(StringIO(
        'Accession Number\tchrom\tstart\tend\ttest\ttest2\n'
        'ENST297338\t8\t117869609\t117869609\t1\t1\n'
        'ENST440973\t6\t152129237\t152129237\t0\t0\n'
        'ENST440974\t6\t152129237\t152129238\t0\t2\n'
        'ENST440975\t6\t152129236\t152129238\t0\t0\n'),
                         sep='\t')
    expected = pd.read_csv(StringIO(
        'Accession Number\tchrom\tstart\tend\ttest\ttest2\n'
        'ENST297338\t8\t117869609\t117869609\t1\t1\n'
        'ENST440974\t6\t152129237\t152129238\t0\t2\n'),
                           sep='\t')
    aae(expected.values, remove_empty(cosmic).values)
def test_process_model_stats_model():
    par_df = pd.DataFrame(
        columns=["value", "pvalue", "standard_error", "ci_lower", "ci_upper"],
        index=["const", "Age", "Sex", "BMI", "ABP"],
    )
    par_df["value"] = [152.133484, 37.241211, -106.577520, 787.179313, 416.673772]
    par_df["pvalue"] = [
        2.048808e-193,
        5.616557e-01,
        8.695658e-02,
        5.345260e-29,
        4.245663e-09,
    ]
    par_df["standard_error"] = [2.852749, 64.117433, 62.125062, 65.424126, 69.494666]
    par_df["ci_lower"] = [146.526671, -88.775663, -228.678572, 658.594255, 280.088446]
    par_df["ci_upper"] = [157.740298, 163.258084, 15.523532, 915.764371, 553.259097]
    info_dict = {}
    info_dict["rsquared"] = 0.40026108237714
    info_dict["rsquared_adj"] = 0.39477148130050055
    info_dict["fvalue"] = 72.91259907398705
    info_dict["f_pvalue"] = 2.700722880950139e-47
    info_dict["df_model"] = 4.0
    info_dict["df_resid"] = 437.0
    info_dict["dependent_variable"] = "target"
    info_dict["resid_std_err"] = 59.97560860753488
    info_dict["n_obs"] = 442.0
    res = _process_model(est)
    afe(res.params, par_df)
    ase(pd.Series(res.info), pd.Series(info_dict))
def test_process_frame_indices_index():
    df = pd.DataFrame(np.ones((3, 3)), columns=["", "", ""])
    df.index = pd.MultiIndex.from_arrays(
        np.array([["today", "today", "today"], ["var1", "var2", "var3"]]))
    df.index.names = ["l1", "l2"]
    par_name_map = {"today": "tomorrow", "var1": "1stvar"}
    index_name_map = ["period", "variable"]
    column_names = list("abc")
    res = _process_frame_indices(
        df,
        custom_param_names=par_name_map,
        custom_index_names=index_name_map,
        column_names=column_names,
        show_col_names=True,
        show_col_groups=False,
        column_groups=None,
    )
    # expected:
    params = """
        period,variable,a,b,c
        tomorrow,1stvar,1,1,1
        tomorrow,var2,1,1,1
        tomorrow,var3,1,1,1
    """
    exp = _read_csv_string(params).fillna("")
    exp.set_index(["period", "variable"], inplace=True)
    afe(res, exp, check_dtype=False)
def test_set_params_defaults_if_missing_minimal_params(minimal_params):
    user_input = minimal_params
    expected = user_input.copy()
    expected["lower"] = -np.inf
    expected["upper"] = np.inf
    expected["group"] = "All Parameters"
    expected["name"] = [str(x) for x in expected.index]
    res = tp._set_params_defaults_if_missing(user_input)
    afe(res, expected)
Example #5
0
 def test_pdagg(self):
     dfe = (self.df.groupby([pd.Grouper(key='time', freq='D')])
            .agg({'duration': ['count']}).reset_index())
     dfe.columns = ['time', 'duration_count']
     afe(logviewer.pdagg(
         self.df,
         [{'key': 'time', 'freq': 'D'}],
         {'duration': ['count']}),
         dfe
         )
def test_apply_number_format_callable():
    def nsf(num, n=3):
        """n-Significant Figures"""
        numstr = ("{0:.%ie}" % (n - 1)).format(num)
        return numstr

    raw = pd.DataFrame(data=[1234.2332, 0.0001])
    exp = pd.DataFrame(data=["1.23e+03", "1.00e-04"])
    res = _apply_number_format(df=raw, number_format=nsf)
    afe(exp, res)
def test_set_params_defaults_if_missing_partial_params(incomplete_params):
    res = tp._set_params_defaults_if_missing(incomplete_params)

    expected = pd.DataFrame()
    expected["value"] = [1, 2.5, 9]
    expected["upper"] = [3, np.inf, np.inf]
    expected["lower"] = [-np.inf, 2, 8]
    expected["group"] = ["coeff", None, None]
    expected["name"] = ["educ", "cutoff1", None]

    afe(res, expected)
def test_process_model_dict():
    df = pd.DataFrame(columns=["value", "pvalue", "standard_error"])
    df["value"] = np.arange(10)
    df["pvalue"] = np.arange(10)
    df["standard_error"] = np.arange(10)
    info = {"stat1": 0, "stat2": 0}
    mod = {}
    mod["params"] = df
    mod["info"] = info
    res = _process_model(mod)
    afe(res.params, mod["params"])
    ase(pd.Series(res.info), pd.Series(mod["info"]))
Example #9
0
def test_bootstrap_from_outcomes(setup, expected):

    results = bootstrap_from_outcomes(
        data=setup["df"],
        outcome=g,
        bootstrap_outcomes=setup["estimates"],
        ci_method="percentile",
    )["summary"]

    # use rounding to adjust precision because there is no other way of handling this
    # such that it is compatible across all supported pandas versions.
    afe(results.round(2), expected["results"].round(2))
def test_process_model_namedtuple():
    # checks that process_model doesn't alter values
    df = pd.DataFrame(columns=["value", "pvalue", "ci_lower", "ci_upper"])
    df["value"] = np.arange(10)
    df["pvalue"] = np.arange(10)
    df["ci_lower"] = np.arange(10)
    df["ci_upper"] = np.arange(10)
    info = {"stat1": 0, "stat2": 0}
    model = NamedTup(params=df, info=info)
    res = _process_model(model)
    afe(res.params, df)
    ase(pd.Series(res.info), pd.Series(info))
def test_evaluate_criterion_array(minimal_params):
    def return_array(params, useless_arg):
        return params["value"].to_numpy()

    expanded_crit = expand_criterion_output(return_array)
    crit_kwargs = {"useless_arg": "hello world"}

    expected_fitness_eval = 13.66666666666666666666666
    expected_comparison_plot_data = minimal_params
    res_fitness, res_cp_data, _ = tp._evaluate_criterion(
        expanded_crit, minimal_params, crit_kwargs)
    assert res_fitness == expected_fitness_eval
    afe(res_cp_data, expected_comparison_plot_data)
def test_evaluate_criterion_scalar(minimal_params):
    def crit_func(params, useless_arg):
        return params["value"].mean()

    expanded_crit = expand_criterion_output(crit_func)
    crit_kwargs = {"useless_arg": "hello world"}

    expected_fitness_eval = 3
    expected_comparison_plot_data = pd.DataFrame()
    expected_comparison_plot_data["value"] = [np.nan]
    res_fitness, res_cp_data, _ = tp._evaluate_criterion(
        expanded_crit, minimal_params, crit_kwargs)
    assert res_fitness == expected_fitness_eval
    afe(res_cp_data, expected_comparison_plot_data)
Example #13
0
def test_process_params_df_columns():

    df = pd.DataFrame(np.ones((3, 6)), columns=list("abcdef"))
    custom_col_names = ["c" + str(i) for i in range(1, 7)]
    custom_model_names = {"m3-5": [2, 3, 4]}
    exp = _process_body_df(df, None, None, True, custom_col_names,
                           custom_model_names)
    df.columns = pd.MultiIndex.from_arrays(
        np.array([
            ["{}", "{}", "{m3-5}", "{m3-5}", "{m3-5}", "{}"],
            ["{c1}", "{c2}", "{c3}", "{c4}", "{c5}", "{c6}"],
        ]))

    afe(df, exp, check_dtype=False)
def test_process_frame_indices_columns():
    df = pd.DataFrame(np.ones((3, 3)), columns=["", "", ""])
    col_names = list("abc")
    col_groups = ["first", "first", "second"]
    res = _process_frame_indices(
        df=df,
        custom_index_names=None,
        custom_param_names=None,
        show_col_groups=True,
        show_col_names=True,
        column_names=col_names,
        column_groups=col_groups,
    )
    arrays = [np.array(col_groups), np.array(col_names)]
    exp = pd.DataFrame(data=np.ones((3, 3)), columns=arrays)
    afe(res, exp, check_dtype=False)
def test_filter_by_match():
    infile = (
        'chrom\twinstart\twinend\tn_region_ind_snps\tind_id\tpop'
        '\ts_star\tnum_s_star_snps\tn_s_star_snps_hap1\tn_s_star_snps_hap2\n'
        '363104291\t0\t50000\t179\tmsp_110\tEUR\t85315\t10\t2\t8\n'
        '363104291\t0\t50000\t173\tmsp_111\tEUR\t47145\t4\t2\t2\n'
        '363104291\t0\t50000\t174\tmsp_112\tASN\t22028\t4\t2\t2\n'
        '363104291\t0\t50000\t180\tmsp_113\tEUR\t82882\t12\t7\t4\n'
        '363104291\t0\t50000\t184\tmsp_114\tEUR\t111004\t15\t8\t7\n'
        '363104291\t0\t50000\t174\tmsp_116\tEUR\t50976\t5\t4\t1\n'
        '363104291\t0\t50000\t176\tmsp_117\tASN\t70315\t7\t7\t0\n'
        '363104292\t0\t50000\t175\tmsp_118\tEUR\t67973\t3\t0\t3\n'
        '363104291\t0\t50000\t171\tmsp_115\tASN\t0\t0\t0\t0\n')
    window = Sstar_ECDF.process_windowcalc(StringIO(infile), 0.5)

    match_file = (
        'chr\tstart\tend\tisc\thaplotype\tpopulation'
        '\tmatch_pct\tpvalue\tmatching_windows\n'
        '363104291\t0\t50000\t199\tmsp_110:1\tEUR\t0.8\t0.01\t2217\n'
        '363104291\t0\t50000\t199\tmsp_110:2\tEUR\t0.8\t0.02\t2217\n')

    result = Sstar_ECDF.filter_by_match(window, StringIO(match_file), 0.02)
    afe(
        result,
        pd.DataFrame({
            'start': [0],
            'end': [50000],
            'n_region_ind_snps': [179],
            'pop': ['EUR'],
            's_star': [85315],
            'msp_ID': ['msp_110:2_363104291'],
        }))

    # result empty as pvalue too low
    result = Sstar_ECDF.filter_by_match(window, StringIO(match_file), 0.01)
    afe(result,
        pd.DataFrame({
            'start': [],
            'end': [],
            'n_region_ind_snps': [],
            'pop': [],
            's_star': [],
            'msp_ID': [],
        }),
        check_index_type=False,
        check_dtype=False)
def test_estimation_table():
    models = [est]
    return_type = "python"
    res = estimation_table(models, return_type, append_notes=False)
    exp = {}
    body_str = """
        index,{(1)}
        const,152.13$^{*** }$
        ,(2.85)
        Age,37.24$^{ }$
        ,(64.12)
        Sex,-106.58$^{* }$
        ,(62.13)
        BMI,787.18$^{*** }$
        ,(65.42)
        ABP,416.67$^{*** }$
        ,(69.49)
    """
    exp["body_df"] = _read_csv_string(body_str).fillna("")
    exp["body_df"].set_index("index", inplace=True)
    footer_str = """
         ,{(1)}
        Observations,442.0
        R$^2$,0.4
        Adj. R$^2$,0.39
        Residual Std. Error,59.98
        F Statistic,72.91$^{***}$
    """
    exp["footer_df"] = _read_csv_string(footer_str).fillna("")
    exp["footer_df"].set_index(" ", inplace=True)
    exp["footer_df"].index.names = [None]
    exp["footer_df"].index = pd.MultiIndex.from_arrays([exp["footer_df"].index])
    exp["notes_tex"] = "\\midrule\n"
    exp[
        "notes_html"
    ] = """<tr><td colspan="2" style="border-bottom: 1px solid black">
        </td></tr>"""

    afe(exp["footer_df"], res["footer_df"])
    afe(exp["body_df"], res["body_df"], check_index_type=False)
    ase(pd.Series(exp["notes_html"]), pd.Series(res["notes_html"]))
    ase(pd.Series(exp["notes_tex"]), pd.Series(res["notes_tex"]))
def test_get_params_frames_with_common_index():
    m1 = {
        "params": pd.DataFrame(np.ones(5), index=list("abcde")),
        "info": None,
        "name": None,
    }
    m2 = {
        "params": pd.DataFrame(np.ones(3), index=list("abc")),
        "info": None,
        "name": None,
    }
    res = _get_params_frames_with_common_index([m1, m2])
    exp = [
        pd.DataFrame(np.ones(5), index=list("abcde")),
        pd.DataFrame(np.concatenate([np.ones(3),
                                     np.ones(2) * np.nan]),
                     index=list("abcde")),
    ]
    afe(res[0], exp[0])
    afe(res[1], exp[1])
Example #18
0
def test_read_pon():
    vcf = StringIO(
        '##tumor_sample=P9-C3\n'
        '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n'
        '1\t10146\t.\tAC\tA\t.\t.\t.\n'
        '1\t10151\t.\tTA\tT,GA\t.\t.\t.\n'
        '1\t10403\t.\tACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC\tA\t.\t.\t.\n'
        '1\t10415\t.\tACCCTAACCCTAACCCTAACCCTAAC\tA\t.\t.\t.\n'
        'hs37d5\t35466424\t.\tG\tGATTCC\t.\t.\t.\n'
        'hs37d5\t35466456\t.\tC\tT\t.\t.\t.\n'
        'X\t155260422\t.\tAGGGGTTAGGGGTTAG\tAGGGTTAGGGGTTAG,A\t.\t.\t.\n'
        'Y\t2661694\t.\tA\tG\t.\t.\t.\n'
        'MT\t151\t.\tCT\tTT,TC\t.\t.\t.')

    result = read_pon(vcf)
    expected = pd.read_csv(StringIO('chrom\tpos\n'
                                    '1\t10146\n'
                                    '1\t10151\n'
                                    '1\t10403\n'
                                    '1\t10415\n'),
                           sep='\t')
    afe(result, expected)
def test_get_params_frames_with_common_index_multiindex():
    mi = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2),
                                    ("b", 3)])
    m1 = {
        "params": pd.DataFrame(np.ones(5), index=mi),
        "info": None,
        "name": None
    }
    m2 = {
        "params": pd.DataFrame(np.ones(3), index=mi[:3]),
        "info": None,
        "name": None
    }
    res = _get_params_frames_with_common_index([m1, m2])
    exp = [
        pd.DataFrame(np.ones(5), index=mi),
        pd.DataFrame(np.concatenate([np.ones(3),
                                     np.ones(2) * np.nan]),
                     index=mi),
    ]
    afe(res[0], exp[0])
    afe(res[1], exp[1])
def test_estimation_table():
    models = [est]
    res = estimation_table(models,
                           return_type="render_inputs",
                           append_notes=False)
    exp = {}
    body = """
        index,target
        const,152.00$^{*** }$
        ,(2.85)
        Age,37.20$^{ }$
        ,(64.10)
        Sex,-107.00$^{* }$
        ,(62.10)
        BMI,787.00$^{*** }$
        ,(65.40)
        ABP,417.00$^{*** }$
        ,(69.50)
    """
    exp["body"] = _read_csv_string(body).fillna("")
    exp["body"].set_index("index", inplace=True)
    footer_str = """
         ,target
        R$^2$,0.40
        Adj. R$^2$,0.40
        Residual Std. Error,60
        F Statistic,72.90$^{***}$
        Observations,442

    """
    exp["footer"] = _read_csv_string(footer_str).fillna("")
    exp["footer"].set_index(" ", inplace=True)
    exp["footer"].index.names = [None]
    exp["footer"].index = pd.MultiIndex.from_arrays([exp["footer"].index])
    afe(exp["footer"].sort_index(), res["footer"].sort_index())
    afe(exp["body"], res["body"], check_index_type=False)
Example #21
0
def test_tabulate_pon():
    cosmic = pd.read_csv(StringIO('Accession Number\tchrom\tstart\tend\n'
                                  'ENST297338\t8\t117869609\t117869609\n'
                                  'ENST440973\t6\t152129237\t152129237\n'
                                  'ENST440974\t6\t152129237\t152129238\n'
                                  'ENST440975\t6\t152129236\t152129238\n'),
                         sep='\t')
    pon = pd.read_csv(StringIO('chrom\tpos\n'
                               '1\t10151\n'
                               '1\t10403\n'
                               '1\t10415\n'),
                      sep='\t')

    result = tabulate_pon(cosmic, pon, 'test')
    afe(result, cosmic)

    pon = pd.read_csv(StringIO('chrom\tpos\n'
                               '8\t117869609\n'
                               '1\t10403\n'
                               '1\t10415\n'),
                      sep='\t')

    cosmic = tabulate_pon(cosmic, pon, 'test')
    expected = pd.read_csv(StringIO(
        'Accession Number\tchrom\tstart\tend\ttest\n'
        'ENST297338\t8\t117869609\t117869609\t1\n'
        'ENST440973\t6\t152129237\t152129237\t0\n'
        'ENST440974\t6\t152129237\t152129238\t0\n'
        'ENST440975\t6\t152129236\t152129238\t0\n'),
                           sep='\t')
    afe(cosmic, expected)

    pon = pd.read_csv(StringIO('chrom\tpos\n'
                               '8\t117869609\n'
                               '6\t152129235\n'
                               '6\t152129236\n'
                               '6\t152129237\n'
                               '6\t152129238\n'
                               '6\t152129239\n'),
                      sep='\t')

    cosmic = tabulate_pon(cosmic, pon, 'test2')
    expected = pd.read_csv(StringIO(
        'Accession Number\tchrom\tstart\tend\ttest\ttest2\n'
        'ENST297338\t8\t117869609\t117869609\t1\t1\n'
        'ENST440973\t6\t152129237\t152129237\t0\t1\n'
        'ENST440974\t6\t152129237\t152129238\t0\t2\n'
        'ENST440975\t6\t152129236\t152129238\t0\t3\n'),
                           sep='\t')
    afe(cosmic, expected)
Example #22
0
def test_get_bootstrap_samples_from_indices():
    indices = [np.array([0, 1])]
    data = pd.DataFrame(np.arange(6).reshape(3, 2))
    expected = pd.DataFrame(np.arange(4).reshape(2, 2))
    calculated = _get_bootstrap_samples_from_indices(data, indices)[0]
    afe(calculated, expected)
def test_filter_by_sstar():
    df = pd.DataFrame({
        'start': [0, 0, 0],
        'end': [50000, 50000, 50000],
        'n_region_ind_snps': [179, 179, 178],
        'pop': ['EUR', 'EUR', 'EUR'],
        's_star': [85315, 85314, 85313],
        'msp_ID':
        ['msp_110:2_363104292', 'msp_110:2_363104293', 'msp_110:2_363104291'],
    })

    null_db = Sstar_ECDF.Null_DB()
    index = pd.MultiIndex.from_tuples(
        [
            ('EUR', 179, 25211),
            ('EUR', 179, 85313),
            ('EUR', 179, 85314),
            ('EUR', 179, 85315),
            ('EUR', 179, 85316),
            ('EUR', 178, 25211),
        ],
        names=['pop', 'n_region_ind_snps', 's_star'])
    null_db.DB = pd.Series([2, 2, 2, 2, 2, 2], index=index)

    # nothing
    result = Sstar_ECDF.filter_by_sstar(df, null_db, -1)
    afe(result,
        pd.DataFrame({
            'start': [],
            'end': [],
            'msp_ID': []
        }),
        check_like=True,
        check_dtype=False)

    # last
    result = Sstar_ECDF.filter_by_sstar(df, null_db, 0)
    afe(result,
        pd.DataFrame({
            'start': [0],
            'end': [50000],
            'msp_ID': ['msp_110:2_363104291']
        }),
        check_like=True,
        check_dtype=False)

    result = Sstar_ECDF.filter_by_sstar(df, null_db, 0.2)
    afe(result,
        pd.DataFrame({
            'start': [0, 0],
            'end': [50000, 50000],
            'msp_ID': ['msp_110:2_363104291', 'msp_110:2_363104292']
        }),
        check_like=True,
        check_dtype=False)

    # Note, groupby sorts by pop, then n_region
    result = Sstar_ECDF.filter_by_sstar(df, null_db, 0.4)
    afe(result,
        pd.DataFrame({
            'start': [0, 0, 0],
            'end': [50000, 50000, 50000],
            'msp_ID': [
                'msp_110:2_363104291', 'msp_110:2_363104292',
                'msp_110:2_363104293'
            ]
        }),
        check_like=True,
        check_dtype=False)

    # empty window input
    df = pd.DataFrame({
        'start': [],
        'end': [],
        'n_region_ind_snps': [],
        'pop': [],
        's_star': [],
        'msp_ID': [],
    })
    result = Sstar_ECDF.filter_by_sstar(df, null_db, 0.05)
    afe(result,
        pd.DataFrame({
            'start': [],
            'end': [],
            'msp_ID': []
        }),
        check_like=True,
        check_dtype=False)
 def test_endpoints(self):
     self.check('/logviewer/')
     self.check('/logviewer/query/', code=404)
     # check query endpoints
     spec = self.get_keywith(conf.url, 'apps/logviewer/query-')
     base = '/logviewer/query/aggD'
     df = self.df
     df_user1 = df['user.id_1'].eq(1)
     df_uri1 = df['uri_1'].eq(1)
     # check filters
     for col in ['status', 'ip']:
         eq_(
             self.get('{}/filter{}/'.format(base, col)).json(), [{
                 col: x
             } for x in sorted(df[col].unique().astype(str))])
     eq_(
         self.get('{}/filter{}/?_limit=10000'.format(base, 'users')).json(),
         [{
             'user.id': x
         } for x in sorted(df[df_user1]['user.id'].unique())])
     # ToDo: See https://github.com/gramener/gramex/issues/252
     ideal = df[df_uri1]['uri'].value_counts().astype(int)[:100]
     ideal = ideal.rename_axis('uri').reset_index(name='views')
     ideal = ideal.sort_values(by=['views', 'uri'], ascending=[False, True])
     ideal.reset_index(inplace=True, drop=True)
     actual = self.get('{}/filter{}/'.format(base, 'uri')).json()
     actual = pd.DataFrame.from_records(actual)
     actual.sort_values(by=['views', 'uri'],
                        ascending=[False, True],
                        inplace=True)
     actual.reset_index(inplace=True, drop=True)
     afe(actual, ideal)
     # check KPIs
     eq_(
         self.get('{}/kpi-{}/'.format(base, 'pageviews')).json(),
         [{
             'value': len(df[df_uri1].index)
         }])
     eq_(
         self.get('{}/kpi-{}/'.format(base, 'sessions')).json(),
         [{
             'value': df[df_user1]['new_session'].sum()
         }])
     eq_(
         self.get('{}/kpi-{}/'.format(base, 'users')).json(),
         [{
             'value': df[df_user1]['user.id'].nunique()
         }])
     eq_(
         self.get('{}/kpi-{}/'.format(base, 'urls')).json(),
         [{
             'value': df[df_uri1]['uri'].nunique()
         }])
     r = self.get('{}/kpi-{}/'.format(base, 'avgtimespent')).json()
     aae(
         r[0]['value'], df[df_user1]['session_time'].sum() /
         df[df_user1]['new_session'].sum(), 4)
     r = self.get('{}/kpi-{}/'.format(base, 'avgloadtime')).json()
     aae(r[0]['value'], df['duration'].mean(), 4)
     # check top10
     topten = [{
         'col': 'user.id',
         'url': 'users',
         'values': 'views',
         'flag': True
     }, {
         'col': 'ip',
         'url': 'ip',
         'values': 'requests'
     }, {
         'col': 'status',
         'url': 'status',
         'values': 'requests'
     }, {
         'col': 'uri',
         'url': 'uri',
         'values': 'views',
         'flag': True
     }]
     for top in topten:
         cond = (df[top['col'] +
                    '_1'].eq(1) if top.get('flag') else slice(None))
         eq_(
             self.get('{}/topten{}/'.format(base, top['url'])).json(),
             (df[cond][top['col']].value_counts().astype(int).rename_axis(
                 top['col']).reset_index(name=top['values']).sort_values(
                     by=[top['values'], top['col']],
                     ascending=[False, True])[:10].to_dict('r')))
     # check trend
     dff = logviewer.pdagg(df[df_uri1], [{
         'key': 'time',
         'freq': 'D'
     }], {'duration': ['count']})
     dff['time'] = dff['time'].dt.strftime('%Y-%m-%d 00:00:00')
     dff['pageviews'] = dff['duration_count'].astype(int)
     dff = dff[dff['pageviews'].ne(0)]
     eq_(
         self.get('{}/{}/'.format(base, 'pageviewstrend')).json(),
         dff.drop('duration_count', 1).to_dict('r'))
     dff = logviewer.pdagg(df[df_user1], [{
         'key': 'time',
         'freq': 'D'
     }], {'new_session': ['sum']})
     dff['time'] = dff['time'].dt.strftime('%Y-%m-%d 00:00:00')
     dff['sessions'] = dff['new_session_sum'].astype(int)
     dff = dff[dff['sessions'].ne(0)]
     eq_(
         self.get('{}/{}/'.format(base, 'sessionstrend')).json(),
         dff.drop('new_session_sum', 1).query('sessions != 0').to_dict('r'))
     # TODO trend queries
     for q in spec.kwargs.kwargs.queries.keys():
         if q.endswith('trend'):
             self.check('{}/{}/'.format(base, q))
def test_apply_number_format_int():
    number_format = 3
    raw = pd.DataFrame(data=["1234.2332", "1.2e+03"])
    exp = pd.DataFrame(data=["1234.233", "1.2e+03"])
    res = _apply_number_format(df=raw, number_format=number_format)
    afe(exp, res)
def test_apply_number_format_tuple():
    number_format = ("{0:.2g}", "{0:.2f}", "{0:.2g}")
    raw = pd.DataFrame(data=[1234.2332, 0.0001])
    exp = pd.DataFrame(data=["1.2e+03", "0"])
    res = _apply_number_format(df=raw, number_format=number_format)
    afe(exp, res)