Esempio n. 1
0
    def setUp(self):
        self.train_df, self.test_df = get_train_test_split()
        self.classes = constants["classes"]

        self.KNN = KNN(k=4, classes=self.classes)
        self.KNN.fit(self.train_df)

        self.NaiveBayes = NaiveBayes(n=3, classes=self.classes)
        self.NaiveBayes.fit(self.train_df)

        self.Linear = Linear(classes=self.classes, max_len=40)
        self.Linear.fit(self.train_df, epochs=1)

        self.W2V = W2V(classes=self.classes)
 def get_model(format, optimised=True) -> AbstractModel:
     if format == 'LogisticRegression':
         return LogisticRegressionModel(optimised)
     if format == 'RandomForest':
         return RandomForestModel(optimised)
     if format == 'NaiveBayes':
         return NaiveBayes(optimised)
     if format == 'GradientBoosting':
         return GradientBoosting(optimised)
     if format == 'SVM':
         return SVM(optimised)
     if format == 'OneClassSVM':
         return OneClassSVMModel(optimised)
     if format == 'DecisionTree':
         return DecisionTree(optimised)
     if format == 'AdaBoost':
         return AdaBoost(optimised)
     if format == 'GaussianProcess':
         return GaussianProcess(optimised)
     if format == 'MLP':
         return MLP(optimised)
     if format == 'KNeighbors':
         return KNeighbors(optimised)
     if format == 'QuadraticDiscriminant':
         return QuadraticDiscriminant(optimised)
     if format == 'Dummy':
         return Dummy(optimised)
     else:
         raise ValueError(format)
    ############################################
    ###### Part II                   ###########
    ############################################

    svm = SVM(verbose=True)
    svm.train(epochs=20)
    hm.report(svm)
    hm.evaluate(svm)

    lr = LogisticRegression(verbose=True)
    lr.train(epochs=20)
    hm.report(lr)
    hm.evaluate(lr)

    nb = NaiveBayes()
    nb.train(epochs=1)
    hm.report(nb)
    hm.evaluate(nb)

    # Logistic regression using sklearn
    import data as dt
    from sklearn.linear_model import LogisticRegression

    train_data = dt.load_data(dt.TRAIN, matrix=True)
    test_data = dt.load_data(dt.TEST, matrix=True)

    lr = LogisticRegression()
    lr.fit(X=train_data[:, 1:], y=train_data[:, 0])

    print('************************************************')
Esempio n. 4
0
    def walk_forward_cv(self):
        """
        Runs walk-forward cross-validation, and saves cross-validation
        metrics.
        """
        for output_name in self.output_names:
            print('\t\t\t|--Prediction type: {}'.format(output_name))
            optimal_params_by_model = {}
            cv_metadata_by_model = {}
            cv_predictions_by_model = {}

            print('\t\t\t\t|--KNN Model')
            knn = KNN()
            knn.cv_params = self.cv_params
            knn.test_name = self.test_name
            knn.full_df = self.full_df
            knn.feature_names = self.feature_names
            knn.output_name = output_name
            knn.run_knn_cv()
            optimal_params_by_model['KNN'] = knn.knn_optimal_params
            cv_predictions_by_model['KNN'] = knn.knn_cv_predictions

            print('\t\t\t\t|--Elastic Net Model')
            elastic_net = ElasticNet()
            elastic_net.cv_params = self.cv_params
            elastic_net.test_name = self.test_name
            elastic_net.full_df = self.full_df
            elastic_net.feature_names = self.feature_names
            elastic_net.feature_dict = self.feature_dict
            elastic_net.output_name = output_name
            elastic_net.run_elastic_net_cv()
            optimal_params_by_model[
                'Elastic_Net'] = elastic_net.elastic_net_optimal_params
            cv_metadata_by_model['Elastic_Net'] = elastic_net.metadata
            cv_predictions_by_model[
                'Elastic_Net'] = elastic_net.elastic_net_cv_predictions

            print('\t\t\t\t|--Naive Bayes Model')
            naive_bayes = NaiveBayes()
            naive_bayes.cv_params = self.cv_params
            naive_bayes.test_name = self.test_name
            naive_bayes.full_df = self.full_df
            naive_bayes.feature_names = self.feature_names
            naive_bayes.feature_dict = self.feature_dict
            naive_bayes.output_name = output_name
            naive_bayes.run_bayes_cv()
            cv_predictions_by_model[
                'Naive_Bayes'] = naive_bayes.bayes_cv_predictions
            optimal_params_by_model[
                'Naive_Bayes'] = naive_bayes.bayes_optimal_params

            print('\t\t\t\t|--SVM Model')
            svm = SupportVectorMachine()
            svm.cv_params = self.cv_params
            svm.test_name = self.test_name
            svm.full_df = self.full_df
            svm.feature_names = self.feature_names
            svm.output_name = output_name
            svm.run_svm_cv()
            optimal_params_by_model['SVM'] = svm.svm_optimal_params
            cv_metadata_by_model['SVM'] = svm.metadata
            cv_predictions_by_model['SVM'] = svm.svm_cv_predictions

            print('\t\t\t\t|--Gaussian Process Model')
            gauss = GaussianProcess()
            gauss.cv_params = self.cv_params
            gauss.test_name = self.test_name
            gauss.full_df = self.full_df
            gauss.feature_names = self.feature_names
            gauss.feature_dict = self.feature_dict
            gauss.output_name = output_name
            gauss.run_gauss_cv()
            cv_predictions_by_model[
                'Gaussian_Process'] = gauss.gauss_cv_predictions
            cv_metadata_by_model['Gaussian_Process'] = gauss.metadata
            optimal_params_by_model[
                'Gaussian_Process'] = gauss.gauss_optimal_params

            print('\t\t\t\t|--XGBoost Model')
            xgboost = XGBoost()
            xgboost.cv_params = self.cv_params
            xgboost.test_name = self.test_name
            xgboost.full_df = self.full_df
            xgboost.feature_names = self.feature_names
            xgboost.feature_dict = self.feature_dict
            xgboost.output_name = output_name
            xgboost.run_xgboost_cv()
            optimal_params_by_model['XGBoost'] = xgboost.xgboost_optimal_params
            cv_metadata_by_model['XGBoost'] = xgboost.metadata
            cv_predictions_by_model['XGBoost'] = xgboost.xgboost_cv_predictions

            self.optimal_params_by_output[
                output_name] = optimal_params_by_model
            self.cv_metadata_by_output[output_name] = cv_metadata_by_model
            self.cv_predictions_by_output[
                output_name] = cv_predictions_by_model
Esempio n. 5
0
    def walk_forward_prediction(self):
        """
        Runs walk-forward prediction, and saves prediction metrics.
        """
        for output_name in self.output_names:
            print('\t\t\t|--Prediction type: {}'.format(output_name))
            prediction_errors_by_model = {}
            predictions_by_model = {}
            pred_metadata_by_model = {}

            print('\t\t\t\t|--KNN Model')
            knn = KNN()
            knn.pred_indices = self.pred_indices
            knn.full_df = self.full_df
            knn.feature_names = self.feature_names
            knn.output_name = output_name
            knn.knn_optimal_params = self.optimal_params_by_output[
                output_name]['KNN']
            knn.run_knn_prediction()
            prediction_errors_by_model['KNN'] = knn.knn_pred_error
            predictions_by_model['KNN'] = knn.knn_predictions

            print('\t\t\t\t|--Elastic Net Model')
            elastic_net = ElasticNet()
            elastic_net.pred_indices = self.pred_indices
            elastic_net.full_df = self.full_df
            elastic_net.feature_names = self.feature_names
            elastic_net.feature_dict = self.feature_dict
            elastic_net.output_name = output_name
            elastic_net.elastic_net_optimal_params = self.optimal_params_by_output[
                output_name]['Elastic_Net']
            elastic_net.run_elastic_net_prediction()
            prediction_errors_by_model[
                'Elastic_Net'] = elastic_net.elastic_net_pred_error
            predictions_by_model[
                'Elastic_Net'] = elastic_net.elastic_net_predictions
            pred_metadata_by_model['Elastic_Net'] = elastic_net.metadata

            print('\t\t\t\t|--Naive Bayes Model')
            naive_bayes = NaiveBayes()
            naive_bayes.pred_indices = self.pred_indices
            naive_bayes.full_df = self.full_df
            naive_bayes.feature_names = self.feature_names
            naive_bayes.output_name = output_name
            naive_bayes.run_bayes_prediction()
            prediction_errors_by_model[
                'Naive_Bayes'] = naive_bayes.bayes_pred_error
            predictions_by_model['Naive_Bayes'] = naive_bayes.bayes_predictions

            print('\t\t\t\t|--SVM Model')
            svm = SupportVectorMachine()
            svm.pred_indices = self.pred_indices
            svm.full_df = self.full_df
            svm.feature_names = self.feature_names
            svm.output_name = output_name
            svm.svm_optimal_params = self.optimal_params_by_output[
                output_name]['SVM']
            svm.run_svm_prediction()
            prediction_errors_by_model['SVM'] = svm.svm_pred_error
            predictions_by_model['SVM'] = svm.svm_predictions
            pred_metadata_by_model['SVM'] = svm.metadata

            print('\t\t\t\t|--Gaussian Process Model')
            gauss = GaussianProcess()
            gauss.pred_indices = self.pred_indices
            gauss.full_df = self.full_df
            gauss.feature_names = self.feature_names
            gauss.output_name = output_name
            gauss.run_gauss_prediction()
            prediction_errors_by_model[
                'Gaussian_Process'] = gauss.gauss_pred_error
            predictions_by_model['Gaussian_Process'] = gauss.gauss_predictions
            pred_metadata_by_model['Gaussian_Process'] = gauss.metadata

            print('\t\t\t\t|--XGBoost Model')
            xgboost = XGBoost()
            xgboost.pred_indices = self.pred_indices
            xgboost.full_df = self.full_df
            xgboost.feature_names = self.feature_names
            xgboost.feature_dict = self.feature_dict
            xgboost.output_name = output_name
            xgboost.xgboost_optimal_params = self.optimal_params_by_output[
                output_name]['XGBoost']
            xgboost.run_xgboost_prediction()
            prediction_errors_by_model['XGBoost'] = xgboost.xgboost_pred_error
            predictions_by_model['XGBoost'] = xgboost.xgboost_predictions
            pred_metadata_by_model['XGBoost'] = xgboost.metadata

            print('\t\t\t\t|--Weighted Average Model')
            weighted_average = WeightedAverage()
            weighted_average.model_names = self.model_names
            weighted_average.cv_results = self.optimal_params_by_output[
                output_name]
            weighted_average.predictions_by_model = predictions_by_model
            weighted_average.run_weighted_average_prediction()
            predictions_by_model[
                'Weighted_Average'] = weighted_average.weighted_average_predictions
            pred_metadata_by_model[
                'Weighted_Average'] = weighted_average.metadata

            self.prediction_errors_by_output[
                output_name] = prediction_errors_by_model
            self.predictions_by_output[output_name] = predictions_by_model
            self.pred_metadata_by_output[output_name] = pred_metadata_by_model
Esempio n. 6
0
class ModelTests(unittest.TestCase):
    def setUp(self):
        self.train_df, self.test_df = get_train_test_split()
        self.classes = constants["classes"]

        self.KNN = KNN(k=4, classes=self.classes)
        self.KNN.fit(self.train_df)

        self.NaiveBayes = NaiveBayes(n=3, classes=self.classes)
        self.NaiveBayes.fit(self.train_df)

        self.Linear = Linear(classes=self.classes, max_len=40)
        self.Linear.fit(self.train_df, epochs=1)

        self.W2V = W2V(classes=self.classes)

    def test_knn_io(self):
        """
        Test that KNN model takes the right inputs and outputs a dictionary with all possible class
        """
        pred, output = self.KNN("BREST")
        self.assertIsInstance(output, dict)
        self.assertIn(pred, self.classes)
        for label in self.classes:
            self.assertIn(label, output.keys())

    def test_knn_output_probabilities(self):
        """
        Test that KNN model returns probabilities for each possible class
        """
        _, output = self.KNN("RADE DE BREST")
        # sums up to one
        self.assertLess(abs(sum(output.values()) - 1), 1e-3)
        # all values between 0 and 1
        for value in output.values():
            self.assertGreaterEqual(value, 0)
            self.assertLessEqual(value, 1)

    def test_knn_case_unsensitive(self):
        pred_upper, output_upper = self.KNN("BREST")
        pred_lower, output_lower = self.KNN("brest")

        self.assertEqual(pred_upper, pred_lower)
        self.assertListEqual(list(output_upper.items()),
                             list(output_lower.items()))

    def test_naive_bayes_io(self):
        """
        Test that Naive Bayes model takes the right inputs and outputs a dictionary with all possible class
        """
        pred, output = self.NaiveBayes("BREST")
        self.assertIn(pred, self.classes)
        self.assertIsInstance(output, dict)

    # def test_naive_bayes_output_probabilities(self):
    #     _, output = self.NaiveBayes("BREST")
    #     self.assertLess(abs(sum(output.values()) - 1), 1e-3)
    #     for label in self.classes:
    #         self.assertIn(label, output.keys())

    def test_linear_io(self):
        """
        Test that Linear model takes the right inputs and outputs a dictionary with all possible class
        """
        pred, output = self.Linear("BREST")
        self.assertIn(pred, self.classes)
        self.assertIsInstance(output, dict)

    def test_linear_output_probabilities(self):
        _, output = self.Linear("BREST")
        self.assertLess(abs(sum(output.values()) - 1), 1e-3)
        for label in self.classes:
            self.assertIn(label, output.keys())

    def test_w2v_io(self):
        """
        Test that Word2Vec model takes the right inputs and outputs a dictionary with all possible class
        """
        pred, output = self.W2V("BREST")
        self.assertIn(pred, self.classes)
        self.assertIsInstance(output, dict)

    def test_w2v_output_probabilities(self):
        _, output = self.W2V("BREST")
        self.assertLess(abs(sum(output.values()) - 1), 1e-3)
        for label in self.classes:
            self.assertIn(label, output.keys())