def test_bin_feature(self): df = pd.DataFrame([[0, 1], [7, 2], [6, 3]], columns=["MATH100", "PHYS125"]) dataset = DataSet(df) dataset.bin("MATH100", [4, 7]) assert_that(dataset, equals_dataset([[0, 1], [2, 2], [1, 3]]))
def test_bin_all(self): df = pd.DataFrame([[0, 6], [9, 2], [6, 4]], columns=["MATH100", "PHYS125"]) dataset = DataSet(df) dataset.bin("*", [4, 7], bin_names=["low", "mid", "high"]) assert_that(dataset, equals_dataset([["low", "mid"], ["high", "low"], ["mid", "mid"]]))
def test_bin_feature_floats(self): df = pd.DataFrame([[3.5, 1], [9.1, 2], [6.2, 3]], columns=["MATH100", "PHYS125"]) dataset = DataSet(df) dataset.bin("MATH100", [3.9, 7], bin_names=["low", "mid", "high"]) assert_that(dataset, equals_dataset([["low", 1], ["high", 2], ["mid", 3]]))
def test_bin_feature_1_boundary(self): df = pd.DataFrame([[0, 1], [9, 2], [6, 3]], columns=["MATH100", "PHYS125"]) dataset = DataSet(df) dataset.bin("MATH100", [3], bin_names=["low", "high"]) assert_that(dataset, equals_dataset([["low", 1], ["high", 2], ["high", 3]]))