def test_check_n_categories_ser(): # verifies that check_n_categories generates the expected output assert good_result.loc[1, 'n_categories'] == cf.check_n_categories(good['g2']).values
def test_mostly_same_ser(): # verifies mostly same output matches expectation cols = ['mostly_same', 'thresh', 'most_common', 'count', 'prop'] assert mostly_same_out2.equals(cf.check_mostly_same(good['g2'], thresh=0.4))
def test_check_n_categories_df(): # verifies that check_n_categories generates the expected output assert good_result.equals(cf.check_n_categories(good))
def test_mostly_same_df(): # verifies mostly same output matches expectation assert mostly_same_out1.equals(cf.check_mostly_same(good, thresh=0.4))
def test_validate_categorical_dtype_bad_ser(): # checks that TypeError is raised when df contains float data with pytest.raises(TypeError) as excinfo: cf._validate_categorical_dtype(bad_type['b1']) # verifies TypeError contains appropriate message assert 'should be of type object or int64' in str(excinfo.value)
def test_validate_categorical_dtype_good_ser(): # verifies good data passes the dtype check cf._validate_categorical_dtype(good['g1'])
def test_check_n_categories_no_dropna_ser(): # verifies that check_n_categories generates the expected output with nulls assert nan_result.loc[0, 'n_categories'] == cf.check_n_categories(nan['g1'], dropna=False).values
def test_check_n_categories_no_dropna_df(): # verifies that check_n_categories generates the expected output with nulls assert nan_result.equals(cf.check_n_categories(nan, dropna=False))