def test_mape_with_perfect_prediction(self):
     gaps = [1, 2, 3]
     predictions = [1, 2, 3]
     self.assertEqual(calculate_mape(gaps, predictions), 0)
 def test_mape_with_zero_gaps(self):
     gaps = [1, 0, 4]
     predictions = [1, 2, 1]
     self.assertEqual(calculate_mape(gaps, predictions), 1 / 3784.)
 def test_mape_with_another_one_off(self):
     gaps = [1, 2, 4]
     predictions = [1, 2, 1]
     self.assertEqual(calculate_mape(gaps, predictions), 1 / 3784.)
 def test_mape_with_one_off(self):
     gaps = [1, 2, 3]
     predictions = [1, 2, 4]
     self.assertEqual(calculate_mape(gaps, predictions), 1 / 8514.)
Beispiel #5
0
from sklearn import svm
from data_preprocessing.get_x_y_from_features import prediction_matching_data_split
from utilities.measures import calculate_mape


def train_predict():
    svr = svm.SVR()
    svr.fit(x_train, y_train)
    return svr.predict(x_test)


# default SVR implementation
x_train, x_test, y_train, y_test = prediction_matching_data_split()
predictions = train_predict()
print calculate_mape(y_test, predictions)
            training_error[i] = metrics.mean_absolute_error(
                y_train[:s], dtr.predict(x_train[:s]))
            test_error[i] = metrics.mean_absolute_error(
                y_test, dtr.predict(x_test))
        learning_curve_graph(sizes, training_error, test_error)


# use grid search to find the best depth
def find_best_depth(max_depths):
    scorer = metrics.make_scorer(calculate_mape, greater_is_better=False)
    model = grid_search.GridSearchCV(tree.DecisionTreeRegressor(),
                                     {'max_depth': max_depths},
                                     scorer,
                                     n_jobs=-1,
                                     cv=30)
    model.fit(x_train, y_train)
    return {
        'prediction': model.predict(x_test),
        'best_depth': model.best_params_['max_depth']
    }


# split data into 75/25 percent sizes
x_train, x_test, y_train, y_test = prediction_matching_data_split()
# use grid search to get prediction results
gs_result = find_best_depth([d for d in range(10, 30)])
print 'best depth', gs_result['best_depth']
predictions = gs_result['prediction']
# use MAPE calculator to find the model performance
print 'MAPE: ', calculate_mape(y_test, predictions)