def setUp(self): self.train_df, self.test_df = get_train_test_split() self.classes = constants["classes"] self.KNN = KNN(k=4, classes=self.classes) self.KNN.fit(self.train_df) self.NaiveBayes = NaiveBayes(n=3, classes=self.classes) self.NaiveBayes.fit(self.train_df) self.Linear = Linear(classes=self.classes, max_len=40) self.Linear.fit(self.train_df, epochs=1) self.W2V = W2V(classes=self.classes)
def get_model(format, optimised=True) -> AbstractModel: if format == 'LogisticRegression': return LogisticRegressionModel(optimised) if format == 'RandomForest': return RandomForestModel(optimised) if format == 'NaiveBayes': return NaiveBayes(optimised) if format == 'GradientBoosting': return GradientBoosting(optimised) if format == 'SVM': return SVM(optimised) if format == 'OneClassSVM': return OneClassSVMModel(optimised) if format == 'DecisionTree': return DecisionTree(optimised) if format == 'AdaBoost': return AdaBoost(optimised) if format == 'GaussianProcess': return GaussianProcess(optimised) if format == 'MLP': return MLP(optimised) if format == 'KNeighbors': return KNeighbors(optimised) if format == 'QuadraticDiscriminant': return QuadraticDiscriminant(optimised) if format == 'Dummy': return Dummy(optimised) else: raise ValueError(format)
############################################ ###### Part II ########### ############################################ svm = SVM(verbose=True) svm.train(epochs=20) hm.report(svm) hm.evaluate(svm) lr = LogisticRegression(verbose=True) lr.train(epochs=20) hm.report(lr) hm.evaluate(lr) nb = NaiveBayes() nb.train(epochs=1) hm.report(nb) hm.evaluate(nb) # Logistic regression using sklearn import data as dt from sklearn.linear_model import LogisticRegression train_data = dt.load_data(dt.TRAIN, matrix=True) test_data = dt.load_data(dt.TEST, matrix=True) lr = LogisticRegression() lr.fit(X=train_data[:, 1:], y=train_data[:, 0]) print('************************************************')
def walk_forward_cv(self): """ Runs walk-forward cross-validation, and saves cross-validation metrics. """ for output_name in self.output_names: print('\t\t\t|--Prediction type: {}'.format(output_name)) optimal_params_by_model = {} cv_metadata_by_model = {} cv_predictions_by_model = {} print('\t\t\t\t|--KNN Model') knn = KNN() knn.cv_params = self.cv_params knn.test_name = self.test_name knn.full_df = self.full_df knn.feature_names = self.feature_names knn.output_name = output_name knn.run_knn_cv() optimal_params_by_model['KNN'] = knn.knn_optimal_params cv_predictions_by_model['KNN'] = knn.knn_cv_predictions print('\t\t\t\t|--Elastic Net Model') elastic_net = ElasticNet() elastic_net.cv_params = self.cv_params elastic_net.test_name = self.test_name elastic_net.full_df = self.full_df elastic_net.feature_names = self.feature_names elastic_net.feature_dict = self.feature_dict elastic_net.output_name = output_name elastic_net.run_elastic_net_cv() optimal_params_by_model[ 'Elastic_Net'] = elastic_net.elastic_net_optimal_params cv_metadata_by_model['Elastic_Net'] = elastic_net.metadata cv_predictions_by_model[ 'Elastic_Net'] = elastic_net.elastic_net_cv_predictions print('\t\t\t\t|--Naive Bayes Model') naive_bayes = NaiveBayes() naive_bayes.cv_params = self.cv_params naive_bayes.test_name = self.test_name naive_bayes.full_df = self.full_df naive_bayes.feature_names = self.feature_names naive_bayes.feature_dict = self.feature_dict naive_bayes.output_name = output_name naive_bayes.run_bayes_cv() cv_predictions_by_model[ 'Naive_Bayes'] = naive_bayes.bayes_cv_predictions optimal_params_by_model[ 'Naive_Bayes'] = naive_bayes.bayes_optimal_params print('\t\t\t\t|--SVM Model') svm = SupportVectorMachine() svm.cv_params = self.cv_params svm.test_name = self.test_name svm.full_df = self.full_df svm.feature_names = self.feature_names svm.output_name = output_name svm.run_svm_cv() optimal_params_by_model['SVM'] = svm.svm_optimal_params cv_metadata_by_model['SVM'] = svm.metadata cv_predictions_by_model['SVM'] = svm.svm_cv_predictions print('\t\t\t\t|--Gaussian Process Model') gauss = GaussianProcess() gauss.cv_params = self.cv_params gauss.test_name = self.test_name gauss.full_df = self.full_df gauss.feature_names = self.feature_names gauss.feature_dict = self.feature_dict gauss.output_name = output_name gauss.run_gauss_cv() cv_predictions_by_model[ 'Gaussian_Process'] = gauss.gauss_cv_predictions cv_metadata_by_model['Gaussian_Process'] = gauss.metadata optimal_params_by_model[ 'Gaussian_Process'] = gauss.gauss_optimal_params print('\t\t\t\t|--XGBoost Model') xgboost = XGBoost() xgboost.cv_params = self.cv_params xgboost.test_name = self.test_name xgboost.full_df = self.full_df xgboost.feature_names = self.feature_names xgboost.feature_dict = self.feature_dict xgboost.output_name = output_name xgboost.run_xgboost_cv() optimal_params_by_model['XGBoost'] = xgboost.xgboost_optimal_params cv_metadata_by_model['XGBoost'] = xgboost.metadata cv_predictions_by_model['XGBoost'] = xgboost.xgboost_cv_predictions self.optimal_params_by_output[ output_name] = optimal_params_by_model self.cv_metadata_by_output[output_name] = cv_metadata_by_model self.cv_predictions_by_output[ output_name] = cv_predictions_by_model
def walk_forward_prediction(self): """ Runs walk-forward prediction, and saves prediction metrics. """ for output_name in self.output_names: print('\t\t\t|--Prediction type: {}'.format(output_name)) prediction_errors_by_model = {} predictions_by_model = {} pred_metadata_by_model = {} print('\t\t\t\t|--KNN Model') knn = KNN() knn.pred_indices = self.pred_indices knn.full_df = self.full_df knn.feature_names = self.feature_names knn.output_name = output_name knn.knn_optimal_params = self.optimal_params_by_output[ output_name]['KNN'] knn.run_knn_prediction() prediction_errors_by_model['KNN'] = knn.knn_pred_error predictions_by_model['KNN'] = knn.knn_predictions print('\t\t\t\t|--Elastic Net Model') elastic_net = ElasticNet() elastic_net.pred_indices = self.pred_indices elastic_net.full_df = self.full_df elastic_net.feature_names = self.feature_names elastic_net.feature_dict = self.feature_dict elastic_net.output_name = output_name elastic_net.elastic_net_optimal_params = self.optimal_params_by_output[ output_name]['Elastic_Net'] elastic_net.run_elastic_net_prediction() prediction_errors_by_model[ 'Elastic_Net'] = elastic_net.elastic_net_pred_error predictions_by_model[ 'Elastic_Net'] = elastic_net.elastic_net_predictions pred_metadata_by_model['Elastic_Net'] = elastic_net.metadata print('\t\t\t\t|--Naive Bayes Model') naive_bayes = NaiveBayes() naive_bayes.pred_indices = self.pred_indices naive_bayes.full_df = self.full_df naive_bayes.feature_names = self.feature_names naive_bayes.output_name = output_name naive_bayes.run_bayes_prediction() prediction_errors_by_model[ 'Naive_Bayes'] = naive_bayes.bayes_pred_error predictions_by_model['Naive_Bayes'] = naive_bayes.bayes_predictions print('\t\t\t\t|--SVM Model') svm = SupportVectorMachine() svm.pred_indices = self.pred_indices svm.full_df = self.full_df svm.feature_names = self.feature_names svm.output_name = output_name svm.svm_optimal_params = self.optimal_params_by_output[ output_name]['SVM'] svm.run_svm_prediction() prediction_errors_by_model['SVM'] = svm.svm_pred_error predictions_by_model['SVM'] = svm.svm_predictions pred_metadata_by_model['SVM'] = svm.metadata print('\t\t\t\t|--Gaussian Process Model') gauss = GaussianProcess() gauss.pred_indices = self.pred_indices gauss.full_df = self.full_df gauss.feature_names = self.feature_names gauss.output_name = output_name gauss.run_gauss_prediction() prediction_errors_by_model[ 'Gaussian_Process'] = gauss.gauss_pred_error predictions_by_model['Gaussian_Process'] = gauss.gauss_predictions pred_metadata_by_model['Gaussian_Process'] = gauss.metadata print('\t\t\t\t|--XGBoost Model') xgboost = XGBoost() xgboost.pred_indices = self.pred_indices xgboost.full_df = self.full_df xgboost.feature_names = self.feature_names xgboost.feature_dict = self.feature_dict xgboost.output_name = output_name xgboost.xgboost_optimal_params = self.optimal_params_by_output[ output_name]['XGBoost'] xgboost.run_xgboost_prediction() prediction_errors_by_model['XGBoost'] = xgboost.xgboost_pred_error predictions_by_model['XGBoost'] = xgboost.xgboost_predictions pred_metadata_by_model['XGBoost'] = xgboost.metadata print('\t\t\t\t|--Weighted Average Model') weighted_average = WeightedAverage() weighted_average.model_names = self.model_names weighted_average.cv_results = self.optimal_params_by_output[ output_name] weighted_average.predictions_by_model = predictions_by_model weighted_average.run_weighted_average_prediction() predictions_by_model[ 'Weighted_Average'] = weighted_average.weighted_average_predictions pred_metadata_by_model[ 'Weighted_Average'] = weighted_average.metadata self.prediction_errors_by_output[ output_name] = prediction_errors_by_model self.predictions_by_output[output_name] = predictions_by_model self.pred_metadata_by_output[output_name] = pred_metadata_by_model
class ModelTests(unittest.TestCase): def setUp(self): self.train_df, self.test_df = get_train_test_split() self.classes = constants["classes"] self.KNN = KNN(k=4, classes=self.classes) self.KNN.fit(self.train_df) self.NaiveBayes = NaiveBayes(n=3, classes=self.classes) self.NaiveBayes.fit(self.train_df) self.Linear = Linear(classes=self.classes, max_len=40) self.Linear.fit(self.train_df, epochs=1) self.W2V = W2V(classes=self.classes) def test_knn_io(self): """ Test that KNN model takes the right inputs and outputs a dictionary with all possible class """ pred, output = self.KNN("BREST") self.assertIsInstance(output, dict) self.assertIn(pred, self.classes) for label in self.classes: self.assertIn(label, output.keys()) def test_knn_output_probabilities(self): """ Test that KNN model returns probabilities for each possible class """ _, output = self.KNN("RADE DE BREST") # sums up to one self.assertLess(abs(sum(output.values()) - 1), 1e-3) # all values between 0 and 1 for value in output.values(): self.assertGreaterEqual(value, 0) self.assertLessEqual(value, 1) def test_knn_case_unsensitive(self): pred_upper, output_upper = self.KNN("BREST") pred_lower, output_lower = self.KNN("brest") self.assertEqual(pred_upper, pred_lower) self.assertListEqual(list(output_upper.items()), list(output_lower.items())) def test_naive_bayes_io(self): """ Test that Naive Bayes model takes the right inputs and outputs a dictionary with all possible class """ pred, output = self.NaiveBayes("BREST") self.assertIn(pred, self.classes) self.assertIsInstance(output, dict) # def test_naive_bayes_output_probabilities(self): # _, output = self.NaiveBayes("BREST") # self.assertLess(abs(sum(output.values()) - 1), 1e-3) # for label in self.classes: # self.assertIn(label, output.keys()) def test_linear_io(self): """ Test that Linear model takes the right inputs and outputs a dictionary with all possible class """ pred, output = self.Linear("BREST") self.assertIn(pred, self.classes) self.assertIsInstance(output, dict) def test_linear_output_probabilities(self): _, output = self.Linear("BREST") self.assertLess(abs(sum(output.values()) - 1), 1e-3) for label in self.classes: self.assertIn(label, output.keys()) def test_w2v_io(self): """ Test that Word2Vec model takes the right inputs and outputs a dictionary with all possible class """ pred, output = self.W2V("BREST") self.assertIn(pred, self.classes) self.assertIsInstance(output, dict) def test_w2v_output_probabilities(self): _, output = self.W2V("BREST") self.assertLess(abs(sum(output.values()) - 1), 1e-3) for label in self.classes: self.assertIn(label, output.keys())