Esempio n. 1
0
def test_padding_paramterised_transformer():
    # load data
    name = 'JapaneseVowels'
    X_train, y_train = _load_dataset(name, split='train', return_X_y=True)
    X_test, y_test = _load_dataset(name, split='test', return_X_y=True)

    # print(X_train)

    padding_transformer = PaddingTransformer(pad_length=40)
    Xt = padding_transformer.fit_transform(X_train)

    # when we tabulrize the data it has 12 dimensions
    # and we've truncated them all to (10-2) long.
    data = tabularize(Xt)
    assert len(data.columns) == 40 * 12
Esempio n. 2
0
def test_padding_transformer():
    # load data
    name = 'JapaneseVowels'
    X_train, y_train = _load_dataset(name, split='train', return_X_y=True)
    X_test, y_test = _load_dataset(name, split='test', return_X_y=True)

    # print(X_train)

    padding_transformer = PaddingTransformer()
    Xt = padding_transformer.fit_transform(X_train)

    # when we tabulrize the data it has 12 dimensions
    # and we've padded them to there normal length of 29
    data = tabularize(Xt)
    assert len(data.columns) == 29 * 12
Esempio n. 3
0
def test_truncation_transformer():
    # load data
    name = 'JapaneseVowels'
    X_train, y_train = _load_dataset(name, split='train', return_X_y=True)
    X_test, y_test = _load_dataset(name, split='test', return_X_y=True)

    # print(X_train)

    truncated_transformer = TruncationTransformer(5)
    Xt = truncated_transformer.fit_transform(X_train)

    # when we tabulrize the data it has 12 dimensions
    # and we've truncated them all to 5 long.
    data = tabularize(Xt)
    assert len(data.columns) == 5 * 12
Esempio n. 4
0
def test_truncation_paramterised_transformer():
    # load data
    name = "JapaneseVowels"
    X_train, y_train = _load_dataset(name, split="train", return_X_y=True)
    X_test, y_test = _load_dataset(name, split="test", return_X_y=True)

    # print(X_train)

    truncated_transformer = TruncationTransformer(2, 10)
    Xt = truncated_transformer.fit_transform(X_train)

    # when we tabulrize the data it has 12 dimensions
    # and we've truncated them all to (10-2) long.
    data = from_nested_to_2d_array(Xt)
    assert len(data.columns) == 8 * 12
Esempio n. 5
0
def test_padding_fill_value_transformer():
    # load data
    name = "JapaneseVowels"
    X_train, y_train = _load_dataset(name, split="train", return_X_y=True)
    X_test, y_test = _load_dataset(name, split="test", return_X_y=True)

    # print(X_train)

    padding_transformer = PaddingTransformer(pad_length=40, fill_value=1)
    Xt = padding_transformer.fit_transform(X_train)

    # when we tabulrize the data it has 12 dimensions
    # and we've truncated them all to (10-2) long.
    data = from_nested_to_2d_array(Xt)
    assert len(data.columns) == 40 * 12
Esempio n. 6
0
def test():
    matplotlib.use('TkAgg')
    df = _load_dataset("PLAID", None, False)
    # We need this later
    cols = df.columns

    # Convert all of our values into lists
    querys = df.values.tolist()
    querydata = []
    for row in querys:
        querydata.append(row[0].tolist())

    # Collect all the class values to a sepperate list
    # for usage later
    classes = []
    for row in querys:
        classes.append(row[1])

    # Get longest element to be our candidate
    candidate = []
    for row in querydata:
        if len(row) > len(candidate):
            candidate = row

    #print("Before scaling...")
    #for x in range(0, 15):
    #    print(len(querydata[x]))

    # Scale our data
    scaleddata = []
    for row in querydata:
        scaleddata.append(us(row, candidate))

    #print("After scaling...")
    #for x in range(0, 15):
    #    print(len(scaleddata[x]))

    # Crate the new dataframe from our scaled elements.
    result = pd.DataFrame(columns=cols)
    for row, classval in zip(scaleddata, classes):
        result.append(row, classval)

    make_graphs(querydata, scaleddata)