Example #1
0
def run_example(stats=False):
    encoder = OneHotEncoder({'animal': 2, 'color': 1}, ['weight', 'height'])

    data = [{'animal': 'cat', 'color': 'blue', 'weight': 6.0, 'height': 88.9, 'extra_junk': 'blah'},
            {'animal': 'cat', 'color': 'red', 'weight': 3.0, 'height': 44.9},
            {'animal': 'dog', 'color': 'yellow', 'weight': 5.5, 'height': 2.5},
            {'animal': 'fish', 'color': 'blue', 'weight': 7.0, 'height': 3233.2},
            {'animal': 'cat', 'color': 'magenta', 'weight': 2.0, 'height': 666.6},
            {'animal': 'mouse', 'color': 'red', 'weight': 0.0, 'height': 55.5},
            {'animal': 'mouse', 'color': 'blah', 'weight': 99.9, 'height': 33}]

    encoder.load_from_data_stream(data)

    encoded_data = encoder.encode_data(data)
    data_decoded = encoder.decode_data(encoded_data)

    expected = [{'height': 88.9, 'weight': 6.0, 'animal': 'cat', 'color': 'blue'},
                {'height': 44.9, 'weight': 3.0, 'animal': 'cat', 'color': 'UNKNOWN_CATEGORICAL_LEVEL'},
                {'height': 2.5, 'weight': 5.5, 'color': 'UNKNOWN_CATEGORICAL_LEVEL', 'animal': 'UNKNOWN_CATEGORICAL_LEVEL'},
                {'height': 3233.2, 'weight': 7.0, 'color': 'blue', 'animal': 'UNKNOWN_CATEGORICAL_LEVEL'},
                {'height': 666.6, 'weight': 2.0, 'animal': 'cat', 'color': 'UNKNOWN_CATEGORICAL_LEVEL'},
                {'height': 55.5, 'weight': 0.0, 'animal': 'mouse', 'color': 'UNKNOWN_CATEGORICAL_LEVEL'},
                {'height': 33, 'weight': 99.9, 'animal': 'mouse', 'color': 'UNKNOWN_CATEGORICAL_LEVEL'}]

    assert data_decoded == expected

    # add number stats?
    if stats:
        encoder.add_numeric_stats(data)

    # check the package
    packaged = encoder.package_data()
    return packaged
def get_round_trip_decoded(stats=False, omit_cols=None):
    encoder = OneHotEncoder({'animal': 2, 'color': 1}, ['weight', 'height'], omit_cols=omit_cols)

    data = [{'animal': 'cat', 'color': 'blue', 'weight': 6.0, 'height': 88.9, 'extra_junk': 'blah'},
            {'animal': 'cat', 'color': 'red', 'weight': 3.0, 'height': 44.9},
            {'animal': 'dog', 'color': 'yellow', 'weight': 5.5, 'height': 2.5},
            {'animal': 'fish', 'color': 'blue', 'weight': 7.0, 'height': 3233.2},
            {'animal': 'cat', 'color': 'magenta', 'weight': 2.0, 'height': 666.6},
            {'animal': 'mouse', 'color': 'red', 'weight': 0.0, 'height': 55.5},
            {'animal': 'mouse', 'color': 'blah', 'weight': 99.9, 'height': 33}]

    encoder.load_from_data_stream(data)

    encoded_data = encoder.encode_data(data)
    data_decoded = encoder.decode_data(encoded_data)

    # add number stats?
    if stats:
        encoder.add_numeric_stats(data)

    # check the package
    packaged = encoder.package_data()

    return data_decoded, packaged