def test_encode_numerical_attributes(): from .testdata import adults01 attr = Attribute(adults01['age']) assert attr.bins[0] <= 19 assert attr.bins[-1] >= 56 assert len(attr.encode()) == len(attr) from sklearn.model_selection import train_test_split train, test = train_test_split(adults01['age']) assert len(attr.encode(data=train)) == len(train)
def test_encode_datetime_attributes(): from pandas import DataFrame from .testdata import adults01 frame = DataFrame(adults01) attr = Attribute(frame['birth']) # assert other information assert len(attr.encode()) == len(attr)
def test_encode_categorical_attributes(): from pandas import DataFrame from .testdata import adults01 frame = DataFrame(adults01) attr = Attribute(frame['education'], categorical=True) columns = ['11th', '7th-8th', '9th', 'Assoc-acdm', 'Bachelors', 'Doctorate', 'HS-grad', 'Masters', 'Some-college'] assert array_equal(attr.bins, columns) assert array_equal(attr.encode().columns, columns)