예제 #1
0
 def test_wrong_params(self):
     df1 = pd.DataFrame(np.random.randn(10, 5),
                        columns=['a', 'b', 'c', 'd', 'e'])
     """
     Validation set has a parameter that is too big
     """
     self.assertRaises(ValueError,
                       lambda: split.split_dataset(df1, 0.6, 0.6))
예제 #2
0
 def test_no_validation_set(self):
     df1 = pd.DataFrame(np.random.randn(10, 5),
                        columns=['a', 'b', 'c', 'd', 'e'])
     train_df, valid_df, test_df = split.split_dataset(df1, 0.6, 0.0)
     self.assertEquals(
         len(train_df.index) + len(test_df.index), len(df1.index))
     self.assertEquals(len(train_df.index), 6)
     self.assertEquals(valid_df, pd.DataFrame.empty)
     self.assertEquals(len(test_df.index), 4)
예제 #3
0
 def test_real_dataset(self):
     #load trajectories file
     df1 = pd.DataFrame.from_csv(path.trajectories_training_file2,
                                 index_col=[0, 1, 2])
     train_df, valid_df, test_df = split.split_dataset(df1, 0.7, 0.1)
     self.assertEquals(
         len(train_df.index) + len(test_df.index) + len(valid_df.index),
         len(df1.index))
     #It's ok if it diverges by 1, because that is in the margin of randomness
     self.assertTrue(len(train_df.index) - int(len(df1.index) <= 1))
     self.assertTrue(len(valid_df.index) - int(len(df1.index) <= 1))
     self.assertTrue(len(test_df.index) - int(len(df1.index) <= 1))
예제 #4
0
import src.vector_gen.generateCurrentSituationVector as vecX
import src.vector_gen.generate_VectorY as vecY
import src.misc.split_train_valid as split
import src.misc.paths as path
from sklearn import linear_model
import pandas as pd
import numpy as np
from sklearn.externals import joblib
from src.misc import evaluation as eval

np.set_printoptions(threshold=np.nan)

df = pd.read_csv(path.trajectories_training_file2)

#X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X,Y,test_size=0.2)
training, validation, testing = split.split_dataset(df)
X_train = vecX.generate_x_df(training)
Y_train = vecY.generate_VectorY_df(training)

X_test = vecX.generate_x_df(testing)
Y_test = vecY.generate_VectorY_df(testing)

clf = linear_model.MultiTaskElasticNet()
clf.fit(X_train, Y_train)

Y_pred = clf.predict(X_test)

error = eval.mape(Y_pred, Y_test)

print(error)
print(np.mean(np.array(error)))