def test_inversion_more_complicated(): encoder = OneHotEncoder(['animal', 'color'], ['weight', 'height'], max_levels_default=100) data = [{ 'animal': 'cat', 'color': 'blue', 'weight': 6.0, 'height': 88.9 }, { 'animal': 'cat', 'color': 'red', 'weight': 3.0, 'height': 44.9 }, { 'animal': 'dog', 'color': 'yellow', 'weight': 5.5, 'height': 2.5 }, { 'animal': 'fish', 'color': 'blue', 'weight': 7.0, 'height': 3233.2 }, { 'animal': 'cat', 'color': 'magenta', 'weight': 2.0, 'height': 666.6 }, { 'animal': 'mouse', 'color': 'red', 'weight': 0.0, 'height': 55.5 }, { 'animal': 'mouse', 'color': 'blah', 'weight': 99.9, 'height': 33 }] encoder.load_from_data_stream(data) encoded_data = encoder.encode_data(data) data_decoded = encoder.decode_data(encoded_data) assert data_decoded == data data_recoded = encoder.encode_data(data_decoded) assert data_recoded == encoded_data
def test_inversion(): encoder = OneHotEncoder(['animal', 'color'], ['weight'], max_levels_default=100) data = [{ 'animal': 'cat', 'color': 'blue', 'weight': 6.0 }, { 'animal': 'cat', 'color': 'red', 'weight': 3.0 }, { 'animal': 'dog', 'color': 'yellow', 'weight': 5.5 }, { 'animal': 'fish', 'color': 'blue', 'weight': 7.0 }, { 'animal': 'cat', 'color': 'magenta', 'weight': 2.0 }, { 'animal': 'mouse', 'color': 'purple', 'weight': 0.0 }, { 'animal': 'mouse', 'color': 'black', 'weight': 99.9 }] encoder.load_from_data_stream(data) encoded_data = encoder.encode_data(data) data_decoded = encoder.decode_data(encoded_data) assert data_decoded == data data_recoded = encoder.encode_data(data_decoded) assert data_recoded == encoded_data
def test_inversion_more_complicated_with_max_levels_diff(): encoder = OneHotEncoder({'animal': 2, 'color': 1}, ['weight', 'height']) data = [{ 'animal': 'cat', 'color': 'blue', 'weight': 6.0, 'height': 88.9, 'extra_junk': 'blah' }, { 'animal': 'cat', 'color': 'red', 'weight': 3.0, 'height': 44.9 }, { 'animal': 'dog', 'color': 'yellow', 'weight': 5.5, 'height': 2.5 }, { 'animal': 'fish', 'color': 'blue', 'weight': 7.0, 'height': 3233.2 }, { 'animal': 'cat', 'color': 'magenta', 'weight': 2.0, 'height': 666.6 }, { 'animal': 'mouse', 'color': 'red', 'weight': 0.0, 'height': 55.5 }, { 'animal': 'mouse', 'color': 'blah', 'weight': 99.9, 'height': 33 }] encoder.load_from_data_stream(data) encoded_data = encoder.encode_data(data) data_decoded = encoder.decode_data(encoded_data) expected = [{ 'height': 88.9, 'weight': 6.0, 'animal': 'cat', 'color': 'blue' }, { 'height': 44.9, 'weight': 3.0, 'animal': 'cat', 'color': 'UNKNOWN_CATEGORICAL_LEVEL' }, { 'height': 2.5, 'weight': 5.5, 'color': 'UNKNOWN_CATEGORICAL_LEVEL', 'animal': 'UNKNOWN_CATEGORICAL_LEVEL' }, { 'height': 3233.2, 'weight': 7.0, 'color': 'blue', 'animal': 'UNKNOWN_CATEGORICAL_LEVEL' }, { 'height': 666.6, 'weight': 2.0, 'animal': 'cat', 'color': 'UNKNOWN_CATEGORICAL_LEVEL' }, { 'height': 55.5, 'weight': 0.0, 'animal': 'mouse', 'color': 'UNKNOWN_CATEGORICAL_LEVEL' }, { 'height': 33, 'weight': 99.9, 'animal': 'mouse', 'color': 'UNKNOWN_CATEGORICAL_LEVEL' }] assert data_decoded == expected