def test_inversion_more_complicated():
    encoder = OneHotEncoder(['animal', 'color'], ['weight', 'height'],
                            max_levels_default=100)

    data = [{
        'animal': 'cat',
        'color': 'blue',
        'weight': 6.0,
        'height': 88.9
    }, {
        'animal': 'cat',
        'color': 'red',
        'weight': 3.0,
        'height': 44.9
    }, {
        'animal': 'dog',
        'color': 'yellow',
        'weight': 5.5,
        'height': 2.5
    }, {
        'animal': 'fish',
        'color': 'blue',
        'weight': 7.0,
        'height': 3233.2
    }, {
        'animal': 'cat',
        'color': 'magenta',
        'weight': 2.0,
        'height': 666.6
    }, {
        'animal': 'mouse',
        'color': 'red',
        'weight': 0.0,
        'height': 55.5
    }, {
        'animal': 'mouse',
        'color': 'blah',
        'weight': 99.9,
        'height': 33
    }]

    encoder.load_from_data_stream(data)

    encoded_data = encoder.encode_data(data)

    data_decoded = encoder.decode_data(encoded_data)
    assert data_decoded == data

    data_recoded = encoder.encode_data(data_decoded)
    assert data_recoded == encoded_data
def test_save_load():
    filename = NamedTemporaryFile().name

    encoder = OneHotEncoder(['animal', 'color'], ['weight'],
                            max_levels_default=100)

    data = [{
        'animal': 'cat',
        'color': 'blue',
        'weight': 6.0
    }, {
        'animal': 'cat',
        'color': 'red',
        'weight': 3.0
    }, {
        'animal': 'dog',
        'color': 'yellow',
        'weight': 5.5
    }, {
        'animal': 'fish',
        'color': 'blue',
        'weight': 7.0
    }, {
        'animal': 'cat',
        'color': 'magenta',
        'weight': 2.0
    }, {
        'animal': 'mouse',
        'color': 'purple',
        'weight': 0.0
    }, {
        'animal': 'mouse',
        'color': 'black',
        'weight': 99.9
    }]

    encoder.load_from_data_stream(data)

    encoded_data = encoder.encode_data(data)

    encoder.save(filename)

    encoder_from_file = OneHotEncoder([], [])
    encoder_from_file.load_from_file(filename)

    encoded_data_from_file = encoder_from_file.encode_data(data)

    assert encoded_data == encoded_data_from_file
def test_inversion():
    encoder = OneHotEncoder(['animal', 'color'], ['weight'],
                            max_levels_default=100)

    data = [{
        'animal': 'cat',
        'color': 'blue',
        'weight': 6.0
    }, {
        'animal': 'cat',
        'color': 'red',
        'weight': 3.0
    }, {
        'animal': 'dog',
        'color': 'yellow',
        'weight': 5.5
    }, {
        'animal': 'fish',
        'color': 'blue',
        'weight': 7.0
    }, {
        'animal': 'cat',
        'color': 'magenta',
        'weight': 2.0
    }, {
        'animal': 'mouse',
        'color': 'purple',
        'weight': 0.0
    }, {
        'animal': 'mouse',
        'color': 'black',
        'weight': 99.9
    }]

    encoder.load_from_data_stream(data)

    encoded_data = encoder.encode_data(data)

    data_decoded = encoder.decode_data(encoded_data)
    assert data_decoded == data

    data_recoded = encoder.encode_data(data_decoded)
    assert data_recoded == encoded_data
def test_inversion_more_complicated_with_max_levels_diff():
    encoder = OneHotEncoder({'animal': 2, 'color': 1}, ['weight', 'height'])

    data = [{
        'animal': 'cat',
        'color': 'blue',
        'weight': 6.0,
        'height': 88.9,
        'extra_junk': 'blah'
    }, {
        'animal': 'cat',
        'color': 'red',
        'weight': 3.0,
        'height': 44.9
    }, {
        'animal': 'dog',
        'color': 'yellow',
        'weight': 5.5,
        'height': 2.5
    }, {
        'animal': 'fish',
        'color': 'blue',
        'weight': 7.0,
        'height': 3233.2
    }, {
        'animal': 'cat',
        'color': 'magenta',
        'weight': 2.0,
        'height': 666.6
    }, {
        'animal': 'mouse',
        'color': 'red',
        'weight': 0.0,
        'height': 55.5
    }, {
        'animal': 'mouse',
        'color': 'blah',
        'weight': 99.9,
        'height': 33
    }]

    encoder.load_from_data_stream(data)

    encoded_data = encoder.encode_data(data)
    data_decoded = encoder.decode_data(encoded_data)

    expected = [{
        'height': 88.9,
        'weight': 6.0,
        'animal': 'cat',
        'color': 'blue'
    }, {
        'height': 44.9,
        'weight': 3.0,
        'animal': 'cat',
        'color': 'UNKNOWN_CATEGORICAL_LEVEL'
    }, {
        'height': 2.5,
        'weight': 5.5,
        'color': 'UNKNOWN_CATEGORICAL_LEVEL',
        'animal': 'UNKNOWN_CATEGORICAL_LEVEL'
    }, {
        'height': 3233.2,
        'weight': 7.0,
        'color': 'blue',
        'animal': 'UNKNOWN_CATEGORICAL_LEVEL'
    }, {
        'height': 666.6,
        'weight': 2.0,
        'animal': 'cat',
        'color': 'UNKNOWN_CATEGORICAL_LEVEL'
    }, {
        'height': 55.5,
        'weight': 0.0,
        'animal': 'mouse',
        'color': 'UNKNOWN_CATEGORICAL_LEVEL'
    }, {
        'height': 33,
        'weight': 99.9,
        'animal': 'mouse',
        'color': 'UNKNOWN_CATEGORICAL_LEVEL'
    }]

    assert data_decoded == expected