def test_wrong_params(self): df1 = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e']) """ Validation set has a parameter that is too big """ self.assertRaises(ValueError, lambda: split.split_dataset(df1, 0.6, 0.6))
def test_no_validation_set(self): df1 = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e']) train_df, valid_df, test_df = split.split_dataset(df1, 0.6, 0.0) self.assertEquals( len(train_df.index) + len(test_df.index), len(df1.index)) self.assertEquals(len(train_df.index), 6) self.assertEquals(valid_df, pd.DataFrame.empty) self.assertEquals(len(test_df.index), 4)
def test_real_dataset(self): #load trajectories file df1 = pd.DataFrame.from_csv(path.trajectories_training_file2, index_col=[0, 1, 2]) train_df, valid_df, test_df = split.split_dataset(df1, 0.7, 0.1) self.assertEquals( len(train_df.index) + len(test_df.index) + len(valid_df.index), len(df1.index)) #It's ok if it diverges by 1, because that is in the margin of randomness self.assertTrue(len(train_df.index) - int(len(df1.index) <= 1)) self.assertTrue(len(valid_df.index) - int(len(df1.index) <= 1)) self.assertTrue(len(test_df.index) - int(len(df1.index) <= 1))
import src.vector_gen.generateCurrentSituationVector as vecX import src.vector_gen.generate_VectorY as vecY import src.misc.split_train_valid as split import src.misc.paths as path from sklearn import linear_model import pandas as pd import numpy as np from sklearn.externals import joblib from src.misc import evaluation as eval np.set_printoptions(threshold=np.nan) df = pd.read_csv(path.trajectories_training_file2) #X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X,Y,test_size=0.2) training, validation, testing = split.split_dataset(df) X_train = vecX.generate_x_df(training) Y_train = vecY.generate_VectorY_df(training) X_test = vecX.generate_x_df(testing) Y_test = vecY.generate_VectorY_df(testing) clf = linear_model.MultiTaskElasticNet() clf.fit(X_train, Y_train) Y_pred = clf.predict(X_test) error = eval.mape(Y_pred, Y_test) print(error) print(np.mean(np.array(error)))