def test_guess_dtypes_mixed_types(smalldf): dtypes = du.guess_dtypes(smalldf) assert dtypes[0] == 'continuous' assert dtypes[1] == 'categorical' assert dtypes[2] == 'categorical' assert dtypes[3] == 'continuous'
def test_guess_dtypes_mixed_types_missing_vals(smalldf): smalldf.ix[0, 0] = float('NaN') smalldf.ix[0, 1] = float('NaN') smalldf.ix[0, 2] = float('NaN') dtypes = du.guess_dtypes(smalldf) assert dtypes[0] == 'continuous' assert dtypes[1] == 'categorical' assert dtypes[2] == 'categorical' assert dtypes[3] == 'continuous'
def test_guess_dtypes_decrease_unique_vals_cutoff(): # large number of unique values df = pd.DataFrame(np.random.rand(5, 4)) dtypes = du.guess_dtypes(df, n_unique_cutoff=2) assert all([dt == 'continuous' for dt in dtypes])
def test_guess_dtypes_increase_unique_vals_cutoff(): # large number of unique values df = pd.DataFrame(np.random.rand(30, 4)) dtypes = du.guess_dtypes(df, n_unique_cutoff=32) assert all([dt == 'categorical' for dt in dtypes])
def test_guess_dtypes_should_guess_correct_types_continuous_short(): # small number of unique values df = pd.DataFrame(np.random.rand(5, 4)) dtypes = du.guess_dtypes(df) assert all([dt == 'categorical' for dt in dtypes])
def test_guess_dtypes_should_guess_correct_types_continuous(): df = pd.DataFrame(np.random.rand(30, 4)) dtypes = du.guess_dtypes(df) assert all([dt == 'continuous' for dt in dtypes])