def test_update_stats(self):
        sparse_gp = SparseGaussianProcesses()

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.85,
                                                                                          normalize=True)

        sparse_gp.train(X_train_set, y_train_set, stats=stats)

        instances = sparse_gp.stats['n_instances_trained']
        dataset_stats = sparse_gp.stats['dataset_stats']

        assert X_train_set.shape[0] == instances
        assert stats == dataset_stats

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.5,
                                                                                          normalize=True)

        sparse_gp.train(X_train_set, y_train_set, stats)

        assert X_train_set.shape[0] + instances == sparse_gp.stats['n_instances_trained']
        assert len(sparse_gp.stats['dataset_stats'].keys()) == len(stats.keys()) == len(dataset_stats.keys())

        missing_data = X_train_set.drop(axis=1, columns='Temperature', inplace=False, errors='ignore')

        with pytest.raises(WrongNumberOfFeatures):
            sparse_gp.train(missing_data, y_train_set, stats)
    def test_update_stats(self):
        seq_length = 24
        updated_cnn = ConvolutionalNeuralNetwork(seq_length)

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.85,
                                                                                          normalize=True)

        updated_cnn.train(X_train_set, y_train_set, stats)

        instances = updated_cnn.stats['n_instances_trained']
        dataset_stats = updated_cnn.stats['dataset_stats']

        assert X_train_set.shape[0] == instances
        assert stats == dataset_stats

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.5,
                                                                                          normalize=True)

        updated_cnn.train(X_train_set, y_train_set, stats)

        assert X_train_set.shape[0] + instances == updated_cnn.stats['n_instances_trained']
        assert len(cnn.stats['dataset_stats'].keys()) == len(stats.keys()) == len(dataset_stats.keys())

        missing_data = X_train_set.drop(axis=1, columns='Temperature', inplace=False, errors='ignore')

        with pytest.raises(WrongNumberOfFeatures):
            updated_cnn.train(missing_data, y_train_set, stats)
    def test_retrain(self):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        instances = gp.stats['n_instances_trained']
        model_stats = gp.stats['dataset_stats']

        assert instances == X_train_set.shape[0]
        assert model_stats == stats

        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=1,
                                                                                                    normalize=True)

        gp.train(X_train_set, y_train_set, stats)

        assert instances + X_train_set.shape[0] == gp.stats['n_instances_trained']
        assert model_stats != gp.stats['dataset_stats'] != stats
 def test_get_training_and_test_set(self, df, portion, expected):
     X_train, y_train, X_test, y_test, _ = MainTransformer.get_training_and_test_set(
         df, 'Pollutant', None, size=portion, normalize=False)
     assert X_train.shape[0] == expected[0] and X_test.shape[0] == expected[
         1]
     assert X_train.shape[0] == y_train.shape[0] and X_test.shape[
         0] == y_test.shape[0]
    def test_train_not_enough_instances(self):
        cnn = ConvolutionalNeuralNetwork(24)

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.01,
                                                                                          normalize=True)

        with pytest.raises(NotEnoughInstancesError):
            cnn.train(X_train_set, y_train_set, stats=stats)
    def test_save_wrong(self, config):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        result, msg = gp.save_model(config)

        assert not result and isinstance(msg, str)
    def test_train_and_test(self):
        cnn = ConvolutionalNeuralNetwork(24)

        # just testing, don't care about overfitting
        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=1,
                                                                                          normalize=True)

        cnn.train(X_train_set, y_train_set, stats=stats)
        predictions = cnn.predict(X_train_set)

        assert len(predictions) == X_train_set.shape[0]
    def test_eval(self, error_func):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)
        result, predictions, y_test_set = gp.eval(X_test, y_test, error_func=error_func)

        predictions_size = len(predictions)

        assert predictions_size == len(X_test)
    def test_save(self, config):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        result, msg = gp.save_model(config)
        global sparse_gp
        sparse_gp = gp

        assert result and msg is None
    def test_train_and_test_various_datasets(self, given_dataset):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(given_dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        assert gp.stats['n_instances_trained'] == X_train_set.shape[0]
        assert gp.stats['dataset_stats'] == stats

        predictions = gp.predict(X_test, uncertainty=True)

        assert len(predictions) == X_test.shape[0]
    def test_train_and_test_no_uncertainty_not_enough_instances(self):
        cnn = ConvolutionalNeuralNetwork(24)

        # just testing, don't care about overfitting
        X_train_set, y_train_set, X_test, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                               'Pollutant',
                                                                                               'Uncertainty',
                                                                                               size=0.95,
                                                                                               normalize=True)

        cnn.train(X_train_set, y_train_set, stats=stats)
        predictions = cnn.predict(X_test, uncertainty=False)

        n_none_predictions = len(list(filter(lambda x: x[0] is None and x[1] is None, predictions)))

        assert n_none_predictions == len(X_test)
    def test_train_and_test(self, uncertainty):
        global dataset

        data_transformer = MainTransformer(config=ConfigReader.CONFIG)
        data_transformer.add_transformer(Transformers.WEATHER_TRANSFORMER)
        data_transformer.add_transformer(Transformers.POLLUTANT_TRANSFORMER)
        data_transformer.transform()
        dataset = data_transformer.get_dataset()

        complete_dataset = dataset.dropna(inplace=False)
        MainTransformer.periodic_f(complete_dataset)

        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.8,
                                                                                                    normalize=True)

        print(np.array(X_train_set))
        print(y_train_set)

        # X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(complete_dataset,
        #                                                                                             'Pollutant',
        #                                                                                             'Uncertainty',
        #                                                                                             size=0.8,
        #                                                                                             normalize=True)
        #
        # print(np.array(X_train_set))
        # print(X_train_set)
        # print(y_train_set)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        assert gp.stats['n_instances_trained'] == X_train_set.shape[0]
        assert gp.stats['dataset_stats'] == stats

        predictions = gp.predict(X_test, uncertainty=uncertainty)

        assert len(predictions) == X_test.shape[0]

        if uncertainty:
            values_without_uncertainty = list(filter(lambda x: len(x) != 2, predictions))
            assert len(values_without_uncertainty) == 0

        if not isinstance(uncertainty, bool):
            assert len(list(filter(lambda x: not isinstance(x, tuple), predictions))) == X_test.shape[0]
    def test_predict_not_enough_instances(self):
        global cnn
        cnn = ConvolutionalNeuralNetwork(24)

        X_train_set, y_train_set, X_test, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                               'Pollutant',
                                                                                               'Uncertainty',
                                                                                               size=0.95,
                                                                                               normalize=True)

        cnn.train(X_train_set, y_train_set, stats=stats)

        predictions = cnn.predict(X_test=X_test, uncertainty=True)

        n_none_predictions = len(list(filter(lambda x: x[0] is None and x[1] is None, predictions)))

        assert X_test.shape[0] == n_none_predictions
    def test_train(self):
        seq_length = 24
        cnn = ConvolutionalNeuralNetwork(seq_length)

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.85,
                                                                                          normalize=True)

        assert not cnn.is_built
        assert cnn.n_features is None
        assert cnn.seq_length == seq_length
        assert cnn.stats['dataset_stats'] != stats

        cnn.train(X_train_set, y_train_set, stats)

        assert cnn.is_built
        assert cnn.n_features == X_train_set.shape[1]
        assert cnn.seq_length == seq_length
        assert cnn.stats['dataset_stats'] == stats
    def test_eval_not_enough(self):
        cnn = ConvolutionalNeuralNetwork(24)

        dataset, err = DB.DBManager.get_dataset(datetime_from=datetime.strptime("01-01-2018 01:00", '%d-%m-%Y %H:%M'),
                                                datetime_to=datetime.strptime("03-01-2018 06:00", '%d-%m-%Y %H:%M'),
                                                # longitude=-1.395778,
                                                # latitude=50.908140,
                                                config=ConfigReader.CONFIG)

        # just testing, don't care about overfitting
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.8,
                                                                                                    normalize=True)

        cnn.train(X_train_set, y_train_set, stats=stats)
        result, predictions, y_test_set = cnn.eval(X_test, y_test)

        predictions_size = len(predictions)

        assert predictions_size == len(X_test)
    def test_train_and_test(self, uncertainty):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset_gp,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        assert gp.stats['n_instances_trained'] == X_train_set.shape[0]
        assert gp.stats['dataset_stats'] == stats

        predictions = gp.predict(X_test, uncertainty=uncertainty)

        assert len(predictions) == X_test.shape[0]

        if uncertainty:
            values_without_uncertainty = list(filter(lambda x: len(x) != 2, predictions))
            assert len(values_without_uncertainty) == 0

        if not isinstance(uncertainty, bool):
            assert len(list(filter(lambda x: not isinstance(x, tuple), predictions))) == X_test.shape[0]