def test_groupby_with_group_named_isnull_pn(self): """ Test case with a group having the same name as a column in TableOne """ df = self.data_pn.copy() columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU'] groupby = 'ICU' group_levels = df[groupby].unique() # collect the possible column names table = TableOne(df, columns=columns, groupby=groupby, pval=True) tableone_columns = list(table.tableone.columns.levels[1]) table = TableOne(df, columns=columns, groupby=groupby, pval=True, pval_adjust='b') tableone_columns = tableone_columns + list(table.tableone.columns.levels[1]) tableone_columns = np.unique(tableone_columns) tableone_columns = [c for c in tableone_columns if c not in group_levels] for c in tableone_columns: # for each output column name in tableone, try them as a group df.loc[0:20,'ICU'] = c if 'adjust' in c: pval_adjust='b' else: pval_adjust=None with assert_raises(InputError): table = TableOne(df, columns=columns, groupby=groupby, pval=True, pval_adjust=pval_adjust)
def test_check_null_counts_are_correct_pn(self): """ Test that the isnull column is correctly reporting number of nulls """ columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death'] categorical = ['ICU', 'death'] groupby = ['death'] # test when not grouping table = TableOne(self.data_pn, columns=columns, categorical=categorical) # get isnull column only isnull = table.tableone.iloc[:,0] for i, v in enumerate(isnull): # skip empty rows by checking value is not a string if 'float' in str(type(v)): # check each null count is correct col = isnull.index[i][0] assert self.data_pn[col].isnull().sum() == v # test when grouping by a variable grouped_table = TableOne(self.data_pn, columns=columns, categorical=categorical, groupby=groupby) # get isnull column only isnull = grouped_table.tableone.iloc[:,0] for i, v in enumerate(isnull): # skip empty rows by checking value is not a string if 'float' in str(type(v)): # check each null count is correct col = isnull.index[i][0] assert self.data_pn[col].isnull().sum() == v
def test_compute_standardized_mean_difference_categorical(self): """ Test that pairwise standardized mean difference is computer correctly for categorical variables. # Ref: Introduction to Meta-Analysis. Michael Borenstein, # L. V. Hedges, J. P. T. Higgins and H. R. Rothstein # Wiley (2011). Chapter 4. Effect Sizes Based on Means. """ t = TableOne(pd.DataFrame([1, 2, 3])) # test with the physionet data cols = [ 'Age', 'SysABP', 'Height', 'Weight', 'ICU', 'MechVent', 'LOS', 'death' ] categorical = ['ICU', 'MechVent', 'death'] strata = "MechVent" t = TableOne(self.data_pn, categorical=categorical, label_suffix=False, groupby=strata, pval=True, htest_name=False, smd=True) # consistent with R StdDiff() and R tableone exp_smd = {'ICU': '0.747', 'MechVent': 'nan', 'death': '0.017'} for k in exp_smd: smd = t.tableone.loc[k, 'Grouped by MechVent']['SMD (0,1)'][0] assert_equal(smd, exp_smd[k])
def test_pval_correction(self): """ Test the pval_adjust argument """ df = pd.DataFrame({ 'numbers': [1, 2, 6, 1, 1, 1], 'other': [1, 2, 3, 3, 3, 4], 'colors': ['red', 'white', 'blue', 'red', 'blue', 'blue'], 'even': ['yes', 'no', 'yes', 'yes', 'no', 'yes'] }) t1 = TableOne(df, groupby="even", pval=True, pval_adjust="bonferroni") # check the multiplier is correct (3 = no. of reported values) pvals_expected = { 'numbers, mean (SD)': '1.000', 'other, mean (SD)': '1.000', 'colors, n (%)': '0.669' } group = 'Grouped by even' col = 'P-Value (adjusted)' for k in pvals_expected: assert_equal(t1.tableone.loc[k][group][col].values[0], pvals_expected[k]) # catch the pval_adjust=True with warnings.catch_warnings(record=False) as w: warnings.simplefilter('ignore', category=UserWarning) t2 = TableOne(df, groupby="even", pval=True, pval_adjust=True) for k in pvals_expected: assert_equal(t1.tableone.loc[k][group][col].values[0], pvals_expected[k])
def test_tableone_columns_in_consistent_order_pn(self): """ Test output columns in TableOne are always in the same order """ df = self.data_pn.copy() columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death'] categorical = ['ICU', 'death'] groupby = ['death'] table = TableOne(df, columns=columns, groupby=groupby, pval=True) assert table.tableone.columns.levels[1][0] == 'isnull' assert table.tableone.columns.levels[1][-1] == 'ptest' assert table.tableone.columns.levels[1][-2] == 'pval' df.loc[df['death'] == 0, 'death'] = 2 table = TableOne(df, columns=columns, groupby=groupby, pval=True, pval_adjust='bonferroni') assert table.tableone.columns.levels[1][0] == 'isnull' assert table.tableone.columns.levels[1][-1] == 'ptest' assert table.tableone.columns.levels[1][-2] == 'pval (adjusted)'
def test_custom_statistical_tests(self): """ Test that the user can specify custom statistical functions. """ # from the example provided at: # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ks_2samp.html # define custom test func = mytest np.random.seed(12345678) n1 = 200 n2 = 300 # Baseline distribution rvs1 = stats.norm.rvs(size=n1, loc=0., scale=1) df1 = pd.DataFrame({'rvs': 'rvs1', 'val': rvs1}) # Different to rvs1 # stats.ks_2samp(rvs1, rvs2) # (0.20833333333333334, 5.129279597781977e-05) rvs2 = stats.norm.rvs(size=n2, loc=0.5, scale=1.5) df2 = pd.DataFrame({'rvs': 'rvs2', 'val': rvs2}) # Similar to rvs1 # stats.ks_2samp(rvs1, rvs3) # (0.10333333333333333, 0.14691437867433876) rvs3 = stats.norm.rvs(size=n2, loc=0.01, scale=1.0) df3 = pd.DataFrame({'rvs': 'rvs3', 'val': rvs3}) # Identical to rvs1 # stats.ks_2samp(rvs1, rvs4) # (0.07999999999999996, 0.41126949729859719) rvs4 = stats.norm.rvs(size=n2, loc=0.0, scale=1.0) df4 = pd.DataFrame({'rvs': 'rvs4', 'val': rvs4}) # Table 1 for different distributions different = df1.append(df2, ignore_index=True) t1_diff = TableOne(data=different, columns=["val"], pval=True, groupby="rvs", htest={"val": func}) assert_almost_equal(t1_diff._htest_table['P-Value'].val, stats.ks_2samp(rvs1, rvs2)[1]) # Table 1 for similar distributions similar = df1.append(df3, ignore_index=True) t1_similar = TableOne(data=similar, columns=["val"], pval=True, groupby="rvs", htest={"val": func}) assert_almost_equal(t1_similar._htest_table['P-Value'].val, stats.ks_2samp(rvs1, rvs3)[1]) # Table 1 for identical distributions identical = df1.append(df4, ignore_index=True) t1_identical = TableOne(data=identical, columns=["val"], pval=True, groupby="rvs", htest={"val": func}) assert_almost_equal(t1_identical._htest_table['P-Value'].val, stats.ks_2samp(rvs1, rvs4)[1])
def test_examples_used_in_the_readme_run_without_raising_error(self): columns = ['time','age','bili','chol','albumin','copper', 'alk.phos','ast','trig','platelet','protime', 'status', 'ascites', 'hepato', 'spiders', 'edema', 'stage', 'sex'] catvars = ['status', 'ascites', 'hepato', 'spiders', 'edema','stage', 'sex'] groupby = 'trt' nonnormal = ['bili'] mytable = TableOne(self.data_pbc, columns, catvars, groupby, nonnormal, pval=False) mytable = TableOne(self.data_pbc, columns, catvars, groupby, nonnormal, pval=True)
def test_order_of_order_categorical_columns(self): """ Test that the order of ordered categorical columns is retained. """ day_cat = pd.Categorical(["mon", "wed", "tue", "thu"], categories=["wed", "thu", "mon", "tue"], ordered=True) alph_cat = pd.Categorical(["a", "b", "c", "a"], categories=["b", "c", "d", "a"], ordered=False) mon_cat = pd.Categorical(["jan", "feb", "mar", "apr"], categories=["feb", "jan", "mar", "apr"], ordered=True) data = pd.DataFrame({"A": ["a", "b", "c", "a"]}) data["day"] = day_cat data["alph"] = alph_cat data["month"] = mon_cat order = {"month": ["jan"], "day": ["mon", "tue", "wed"]} # if a custom order is not specified, the categorical order # specified above should apply t1 = TableOne(data, label_suffix=False) t1_expected_order = { 'month': ["feb", "jan", "mar", "apr"], 'day': ["wed", "thu", "mon", "tue"] } for k in order: assert_list_equal(t1._order[k], t1_expected_order[k]) assert_list_equal(t1.tableone.loc[k].index.to_list(), t1_expected_order[k]) # if a desired order is set, it should override the order t2 = TableOne(data, order=order, label_suffix=False) t2_expected_order = { 'month': ["jan", "feb", "mar", "apr"], 'day': ["mon", "tue", "wed", "thu"] } for k in order: assert_list_equal(t2._order[k], t2_expected_order[k]) assert_list_equal(t2.tableone.loc[k].index.to_list(), t2_expected_order[k])
def test_tableone_columns_in_consistent_order_pn(self): """ Test output columns in TableOne are always in the same order """ df = self.data_pn.copy() columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death'] categorical = ['ICU', 'death'] groupby = ['death'] table = TableOne(df, columns=columns, groupby=groupby, pval=True, htest_name=True, overall=False) assert table.tableone.columns.levels[1][0] == 'Missing' assert table.tableone.columns.levels[1][-1] == 'Test' assert table.tableone.columns.levels[1][-2] == 'P-Value' df.loc[df['death'] == 0, 'death'] = 2 # without overall column table = TableOne(df, columns=columns, groupby=groupby, pval=True, pval_adjust='bonferroni', htest_name=True, overall=False) assert table.tableone.columns.levels[1][0] == 'Missing' assert table.tableone.columns.levels[1][-1] == 'Test' assert table.tableone.columns.levels[1][-2] == 'P-Value (adjusted)' # with overall column table = TableOne(df, columns=columns, groupby=groupby, pval=True, pval_adjust='bonferroni', htest_name=True, overall=True) assert table.tableone.columns.levels[1][0] == 'Missing' assert table.tableone.columns.levels[1][1] == 'Overall' assert table.tableone.columns.levels[1][-1] == 'Test' assert table.tableone.columns.levels[1][-2] == 'P-Value (adjusted)'
def test_limit_of_categorical_data_pn(self): """ Tests the `limit` keyword arg, which limits the number of categories presented """ data_pn = self.data_pn.copy() # 6 categories of age based on decade data_pn['age_group'] = data_pn['Age'].map(lambda x: int(x / 10)) # limit columns = [ 'age_group', 'Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death' ] categorical = ['age_group', 'ICU', 'death'] # test it limits to 3 table = TableOne(data_pn, columns=columns, categorical=categorical, limit=3, label_suffix=False) assert table.tableone.loc['age_group', :].shape[0] == 3 # test other categories are not affected if limit > num categories assert table.tableone.loc['death', :].shape[0] == 2
def test_label_dictionary_input_pn(self): """ Test columns and rows are relabelled with the label argument """ df = self.data_pn.copy() columns = ['Age', 'ICU', 'death'] categorical = ['death', 'ICU'] groupby = 'death' labels = { 'death': 'mortality', 'Age': 'Age, years', 'ICU': 'Intensive Care Unit' } table = TableOne(df, columns=columns, categorical=categorical, groupby=groupby, labels=labels) # check the header column is updated (groupby variable) assert table.tableone.columns.levels[0][0] == 'Grouped by mortality' # check the categorical rows are updated assert 'Intensive Care Unit' in table.tableone.index.levels[0] # check the continuous rows are updated assert 'Age, years' in table.tableone.index.levels[0]
def test_min_max_for_nonnormal_variables(self): """ Test the min_max argument returns expected results. """ # columns to summarize columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death'] # columns containing categorical variables categorical = ['ICU'] # set decimal places for age to 0 decimals = {"Age": 0} # non-normal variables nonnormal = ['Age'] # optionally, a categorical variable for stratification groupby = ['death'] t1 = TableOne(self.data_pn, columns=columns, categorical=categorical, groupby=groupby, nonnormal=nonnormal, decimals=decimals, min_max=['Age']) k = "Age, median [min,max]" group = "Grouped by death" t1_columns = ["Overall", "0", "1"] expected = ["68 [16,90]", "66 [16,90]", "75 [26,90]"] for c, e in zip(t1_columns, expected): cell = t1.tableone.loc[k][group][c].values[0] assert_equal(cell, e)
def test_statistical_tests_skipped_if_subgroups_have_zero_observations(self): """ Ensure that the package skips running statistical tests if the subgroups have zero observations """ categorical=['likesmarmalade'] table = TableOne(self.data_sample, categorical=categorical, groupby='bear', pval=True) assert table._significance_table.loc['likesmarmalade','testname'] == 'Not tested'
def test_examples_used_in_the_readme_run_without_raising_error_pn(self): columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death'] categorical = ['ICU', 'death'] groupby = ['death'] nonnormal = ['Age'] mytable = TableOne(self.data_pn, columns=columns, categorical=categorical, groupby=groupby, nonnormal=nonnormal, pval=False)
def test_with_data_as_only_input_argument(self): """ Test with a simple dataset that a table generated with no pre-specified columns returns the same results as a table generated with specified columns """ table_no_args = TableOne(self.data_groups) columns = ['group','age','weight'] categorical=['group'] table_with_args = TableOne(self.data_groups, columns=columns, categorical=categorical) assert table_no_args._columns == table_with_args._columns assert table_no_args._categorical == table_with_args._categorical assert table_no_args._remarks == table_with_args._remarks assert (table_no_args.tableone.columns == table_with_args.tableone.columns).all() assert (table_no_args.tableone['overall'].values == \ table_with_args.tableone['overall'].values).all() assert (table_no_args.tableone == table_with_args.tableone).all().all()
def test_overall_mean_and_std_as_expected_for_cont_variable(self): columns=['normal','nonnormal','height'] table = TableOne(self.data_sample, columns=columns) mean = table._cont_describe.loc['normal']['mean']['overall'] std = table._cont_describe.loc['normal']['std']['overall'] assert abs(mean-self.mu) <= 0.02 assert abs(std-self.sigma) <= 0.02
def test_fisher_exact_for_small_cell_count(self): """ Ensure that the package runs Fisher exact if cell counts are <=5 and it's a 2x2 """ categorical=['group1','group3'] table = TableOne(self.data_small, categorical=categorical, groupby='group2', pval=True) # group2 should be tested because it's a 2x2 # group3 is a 2x3 so should not be tested assert table._significance_table.loc['group1','ptest'] == 'Fisher''s exact' assert table._significance_table.loc['group3','ptest'] == 'Chi-squared (warning: expected count < 5)'
def test_robust_to_duplicates_in_input_df_index(self): d_control = pd.DataFrame(data={'group': [0, 0, 0, 0, 0, 0, 0], 'value': [3, 4, 4, 4, 4, 4, 5]}) d_case = pd.DataFrame(data={'group': [1, 1, 1], 'value': [1, 2, 3]}) d = pd.concat([d_case, d_control]) with assert_raises(InputError): t = TableOne(d, ['value'], groupby='group', pval=True) d_idx_reset = pd.concat([d_case, d_control], ignore_index=True) t2 = TableOne(d_idx_reset, ['value'], groupby='group', pval=True) header = "Grouped by group" mean_std_0 = t2.tableone[header].at[("value, mean (SD)", ""), "0"] mean_std_1 = t2.tableone[header].at[("value, mean (SD)", ""), "1"] assert mean_std_0 == '4.0 (0.6)' assert mean_std_1 == '2.0 (1.0)'
def test_categorical_cell_count(self): """ Ensure that the package runs Fisher exact if cell counts are <=5 and it's a 2x2 """ categorical=list(np.arange(10)) table = TableOne(self.data_categorical, columns=categorical,categorical=categorical) # each column for i in np.arange(10): # each category should have 100 levels assert table._cat_describe['overall'].loc[i].shape[0] == 100
def test_string_data_as_continuous_error(self): """ Test raising an error when continuous columns contain non-numeric data """ try: # Trigger the categorical warning table = TableOne(self.data_mixed, categorical=[]) except InputError as e: starts_str = "The following continuous column(s) have non-numeric values" assert e.args[0].startswith(starts_str) except: # unexpected error - raise it raise
def test_overall_n_and_percent_as_expected_for_binary_cat_variable_with_nan(self): """ Ignore NaNs when counting the number of values and the overall percentage """ categorical=['likeshoney'] table = TableOne(self.data_sample, columns=categorical, categorical=categorical) lh = table._cat_describe['overall'].loc['likeshoney'] likefreq = lh.loc[1.0,'freq'] likepercent = lh.loc[1.0,'percent'] assert likefreq == 5993 assert abs(100-likepercent) <= 0.01
def test_categorical_cell_count(self): """ Check the categorical cell counts are correct """ categorical=list(np.arange(10)) table = TableOne(self.data_categorical, columns=categorical,categorical=categorical) df = table.cat_describe # drop 'overall' level of column index df.columns = df.columns.droplevel(level=1) # each column for i in np.arange(10): # each category should have 100 levels assert df.loc[i].shape[0] == 100
def test_tableone_row_sort_pn(self): """ Test sort functionality of TableOne """ df = self.data_pn.copy() columns = ['Age', 'SysABP', 'Height', 'Weight', 'ICU', 'death'] table = TableOne(df, columns=columns) # a call to .index.levels[0] automatically sorts the levels # instead, call values and use pd.unique as it preserves order tableone_rows = pd.unique([x[0] for x in table.tableone.index.values]) # default should not sort for i, c in enumerate(columns): # i+1 because we skip the first row, 'n' assert tableone_rows[i + 1] == c table = TableOne(df, columns=columns, sort=True) tableone_rows = pd.unique([x[0] for x in table.tableone.index.values]) for i, c in enumerate(np.sort(columns)): # i+1 because we skip the first row, 'n' assert tableone_rows[i + 1] == c
def test_overall_mean_and_std_as_expected_for_cont_variable(self): columns = ['normal', 'nonnormal', 'height'] table = TableOne(self.data_sample, columns=columns) mean = table.cont_describe.loc['normal']['mean']['Overall'] std = table.cont_describe.loc['normal']['std']['Overall'] print(self.data_sample.mean()) print(self.data_sample.std()) assert abs(mean - self.data_sample.normal.mean()) <= 0.02 assert abs(std - self.data_sample.normal.std()) <= 0.02
def test_sequence_of_cont_table(self): """ Ensure that the columns align with the values """ columns = ['age','weight'] categorical = [] groupby = 'group' t = TableOne(self.data_groups, columns = columns, categorical = categorical, groupby = groupby, isnull = False) # n and weight rows are already ordered, so sorting should not alter the order assert t.tableone[0][1:] == sorted(t.tableone[0][1:]) assert t.tableone[1][1:] == ['0.50 (0.71)', '3.50 (1.29)', '8.50 (1.87)', '15.50 (2.45)'] assert t.tableone[2][1:] == sorted(t.tableone[2][1:])
def df_to_table(df, incl_vars, categorical, nonnormal, groupvar, pval, labels, order, missing): df_cols = list(df) # this is the list of columns for which Table 1 is generated col_list = [df_col for df_col in incl_vars] my_table = TableOne(df, columns=col_list, groupby=groupvar, nonnormal=nonnormal, categorical=categorical, pval=pval, missing=missing, label_suffix=True, rename=labels, order=order) my_table_html = my_table.to_html( classes=["table", "table-dark", 'table-sm']) return my_table_html
def test_row_percent_false(self): """ Test row_percent=False displays n(%) for the column. """ # columns to summarize columns = ['Age', 'SysABP', 'Height', 'MechVent', 'ICU', 'death'] # columns containing categorical variables categorical = ['ICU', 'MechVent'] # set decimal places for age to 0 decimals = {"Age": 0} # non-normal variables nonnormal = ['Age'] # optionally, a categorical variable for stratification groupby = ['death'] group = "Grouped by death" # row_percent = False t1 = TableOne(self.data_pn, columns=columns, categorical=categorical, groupby=groupby, nonnormal=nonnormal, decimals=decimals, row_percent=False) row1 = list(t1.tableone.loc["MechVent, n (%)"][group].values[0]) row1_expect = [0, '540 (54.0)', '468 (54.2)', '72 (52.9)'] assert_list_equal(row1, row1_expect) row2 = list(t1.tableone.loc["MechVent, n (%)"][group].values[1]) row2_expect = ['', '460 (46.0)', '396 (45.8)', '64 (47.1)'] assert_list_equal(row2, row2_expect) row3 = list(t1.tableone.loc["ICU, n (%)"][group].values[0]) row3_expect = [0, '162 (16.2)', '137 (15.9)', '25 (18.4)'] assert_list_equal(row3, row3_expect) row4 = list(t1.tableone.loc["ICU, n (%)"][group].values[1]) row4_expect = ['', '202 (20.2)', '194 (22.5)', '8 (5.9)'] assert_list_equal(row4, row4_expect) row5 = list(t1.tableone.loc["ICU, n (%)"][group].values[2]) row5_expect = ['', '380 (38.0)', '318 (36.8)', '62 (45.6)'] assert_list_equal(row5, row5_expect) row6 = list(t1.tableone.loc["ICU, n (%)"][group].values[3]) row6_expect = ['', '256 (25.6)', '215 (24.9)', '41 (30.1)'] assert_list_equal(row6, row6_expect)
def test_row_percent_true_and_overall_false(self): """ Test row_percent=True displays n(%) for the row rather than the column. """ # columns to summarize columns = ['Age', 'SysABP', 'Height', 'MechVent', 'ICU', 'death'] # columns containing categorical variables categorical = ['ICU', 'MechVent'] # set decimal places for age to 0 decimals = {"Age": 0} # non-normal variables nonnormal = ['Age'] # optionally, a categorical variable for stratification groupby = ['death'] group = "Grouped by death" # row_percent = True t1 = TableOne(self.data_pn, columns=columns, overall=False, categorical=categorical, groupby=groupby, nonnormal=nonnormal, decimals=decimals, row_percent=True) row1 = list(t1.tableone.loc["MechVent, n (%)"][group].values[0]) row1_expect = [0, '468 (86.7)', '72 (13.3)'] assert_list_equal(row1, row1_expect) row2 = list(t1.tableone.loc["MechVent, n (%)"][group].values[1]) row2_expect = ['', '396 (86.1)', '64 (13.9)'] assert_list_equal(row2, row2_expect) row3 = list(t1.tableone.loc["ICU, n (%)"][group].values[0]) row3_expect = [0, '137 (84.6)', '25 (15.4)'] assert_list_equal(row3, row3_expect) row4 = list(t1.tableone.loc["ICU, n (%)"][group].values[1]) row4_expect = ['', '194 (96.0)', '8 (4.0)'] assert_list_equal(row4, row4_expect) row5 = list(t1.tableone.loc["ICU, n (%)"][group].values[2]) row5_expect = ['', '318 (83.7)', '62 (16.3)'] assert_list_equal(row5, row5_expect) row6 = list(t1.tableone.loc["ICU, n (%)"][group].values[3]) row6_expect = ['', '215 (84.0)', '41 (16.0)'] assert_list_equal(row6, row6_expect)
def test_sequence_of_cont_table(self): """ Ensure that the columns align with the values """ columns = ['age','weight'] categorical = [] groupby = 'group' t = TableOne(self.data_groups, columns = columns, categorical = categorical, groupby = groupby, isnull = False) # n and weight rows are already ordered, so sorting should not alter the order assert (t.tableone.loc['n'].values[0].astype(float) == \ sorted(t.tableone.loc['n'].values[0].astype(float))).any() assert (t.tableone.loc['age'].values[0] == \ ['0.50 (0.71)', '3.50 (1.29)', '8.50 (1.87)', '15.50 (2.45)']).any()
def test_overall_n_and_percent_as_expected_for_binary_cat_variable(self): categorical=['likesmarmalade'] table = TableOne(self.data_sample, columns=categorical, categorical=categorical) lm = table._cat_describe['overall'].loc['likesmarmalade'] notlikefreq = lm.loc[0,'freq'] notlikepercent = lm.loc[0,'percent'] likefreq = lm.loc[1,'freq'] likepercent = lm.loc[1,'percent'] assert notlikefreq + likefreq == 10000 assert abs(100 - notlikepercent - likepercent) <= 0.02 assert notlikefreq == 8977 assert likefreq == 1023