def test_padding_paramterised_transformer(): # load data name = 'JapaneseVowels' X_train, y_train = _load_dataset(name, split='train', return_X_y=True) X_test, y_test = _load_dataset(name, split='test', return_X_y=True) # print(X_train) padding_transformer = PaddingTransformer(pad_length=40) Xt = padding_transformer.fit_transform(X_train) # when we tabulrize the data it has 12 dimensions # and we've truncated them all to (10-2) long. data = tabularize(Xt) assert len(data.columns) == 40 * 12
def test_padding_transformer(): # load data name = 'JapaneseVowels' X_train, y_train = _load_dataset(name, split='train', return_X_y=True) X_test, y_test = _load_dataset(name, split='test', return_X_y=True) # print(X_train) padding_transformer = PaddingTransformer() Xt = padding_transformer.fit_transform(X_train) # when we tabulrize the data it has 12 dimensions # and we've padded them to there normal length of 29 data = tabularize(Xt) assert len(data.columns) == 29 * 12
def test_truncation_transformer(): # load data name = 'JapaneseVowels' X_train, y_train = _load_dataset(name, split='train', return_X_y=True) X_test, y_test = _load_dataset(name, split='test', return_X_y=True) # print(X_train) truncated_transformer = TruncationTransformer(5) Xt = truncated_transformer.fit_transform(X_train) # when we tabulrize the data it has 12 dimensions # and we've truncated them all to 5 long. data = tabularize(Xt) assert len(data.columns) == 5 * 12
def test_truncation_paramterised_transformer(): # load data name = "JapaneseVowels" X_train, y_train = _load_dataset(name, split="train", return_X_y=True) X_test, y_test = _load_dataset(name, split="test", return_X_y=True) # print(X_train) truncated_transformer = TruncationTransformer(2, 10) Xt = truncated_transformer.fit_transform(X_train) # when we tabulrize the data it has 12 dimensions # and we've truncated them all to (10-2) long. data = from_nested_to_2d_array(Xt) assert len(data.columns) == 8 * 12
def test_padding_fill_value_transformer(): # load data name = "JapaneseVowels" X_train, y_train = _load_dataset(name, split="train", return_X_y=True) X_test, y_test = _load_dataset(name, split="test", return_X_y=True) # print(X_train) padding_transformer = PaddingTransformer(pad_length=40, fill_value=1) Xt = padding_transformer.fit_transform(X_train) # when we tabulrize the data it has 12 dimensions # and we've truncated them all to (10-2) long. data = from_nested_to_2d_array(Xt) assert len(data.columns) == 40 * 12
def test(): matplotlib.use('TkAgg') df = _load_dataset("PLAID", None, False) # We need this later cols = df.columns # Convert all of our values into lists querys = df.values.tolist() querydata = [] for row in querys: querydata.append(row[0].tolist()) # Collect all the class values to a sepperate list # for usage later classes = [] for row in querys: classes.append(row[1]) # Get longest element to be our candidate candidate = [] for row in querydata: if len(row) > len(candidate): candidate = row #print("Before scaling...") #for x in range(0, 15): # print(len(querydata[x])) # Scale our data scaleddata = [] for row in querydata: scaleddata.append(us(row, candidate)) #print("After scaling...") #for x in range(0, 15): # print(len(scaleddata[x])) # Crate the new dataframe from our scaled elements. result = pd.DataFrame(columns=cols) for row, classval in zip(scaleddata, classes): result.append(row, classval) make_graphs(querydata, scaleddata)