def test_normalize_unnormalize_without_dataset(self, stats):
        result_normalized, _ = MainTransformer.normalize(None, {},
                                                         inplace=False)
        result_unnormalize = MainTransformer.unnormalize(None, {},
                                                         inplace=False)

        assert result_normalized is None and result_unnormalize is None
    def test_retrain(self):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        instances = gp.stats['n_instances_trained']
        model_stats = gp.stats['dataset_stats']

        assert instances == X_train_set.shape[0]
        assert model_stats == stats

        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=1,
                                                                                                    normalize=True)

        gp.train(X_train_set, y_train_set, stats)

        assert instances + X_train_set.shape[0] == gp.stats['n_instances_trained']
        assert model_stats != gp.stats['dataset_stats'] != stats
    def test_update_stats(self):
        sparse_gp = SparseGaussianProcesses()

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.85,
                                                                                          normalize=True)

        sparse_gp.train(X_train_set, y_train_set, stats=stats)

        instances = sparse_gp.stats['n_instances_trained']
        dataset_stats = sparse_gp.stats['dataset_stats']

        assert X_train_set.shape[0] == instances
        assert stats == dataset_stats

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.5,
                                                                                          normalize=True)

        sparse_gp.train(X_train_set, y_train_set, stats)

        assert X_train_set.shape[0] + instances == sparse_gp.stats['n_instances_trained']
        assert len(sparse_gp.stats['dataset_stats'].keys()) == len(stats.keys()) == len(dataset_stats.keys())

        missing_data = X_train_set.drop(axis=1, columns='Temperature', inplace=False, errors='ignore')

        with pytest.raises(WrongNumberOfFeatures):
            sparse_gp.train(missing_data, y_train_set, stats)
    def test_update_stats(self):
        seq_length = 24
        updated_cnn = ConvolutionalNeuralNetwork(seq_length)

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.85,
                                                                                          normalize=True)

        updated_cnn.train(X_train_set, y_train_set, stats)

        instances = updated_cnn.stats['n_instances_trained']
        dataset_stats = updated_cnn.stats['dataset_stats']

        assert X_train_set.shape[0] == instances
        assert stats == dataset_stats

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.5,
                                                                                          normalize=True)

        updated_cnn.train(X_train_set, y_train_set, stats)

        assert X_train_set.shape[0] + instances == updated_cnn.stats['n_instances_trained']
        assert len(cnn.stats['dataset_stats'].keys()) == len(stats.keys()) == len(dataset_stats.keys())

        missing_data = X_train_set.drop(axis=1, columns='Temperature', inplace=False, errors='ignore')

        with pytest.raises(WrongNumberOfFeatures):
            updated_cnn.train(missing_data, y_train_set, stats)
 def test_config(self, given_config):
     if given_config is None:
         with pytest.raises(WrongConfigTypeException):
             print('test')
             main_transformer = MainTransformer(config=given_config)
             assert True
     else:
         main_transformer = MainTransformer(config=given_config)
         assert True
    def test_get_transformers(self, weather_transformer,
                              pollutant_transformer):
        main_transformer = MainTransformer(config=config)
        result_weather = main_transformer.add_transformer(weather_transformer)
        result_pollutant = main_transformer.add_transformer(
            pollutant_transformer)

        if result_weather ^ ('weather' in main_transformer.transformers):
            assert False

        if result_pollutant ^ ('pollutant' in main_transformer.transformers):
            assert False
    def test_add_transformer(self, transformer):
        main_transformer = MainTransformer(config=config)
        result = main_transformer.add_transformer(transformer)

        if not isinstance(transformer, Transformers):
            assert not result

        if 'weather' in main_transformer.transformers or 'pollutant' in main_transformer.transformers:
            assert result

        result = main_transformer.add_transformer(transformer)

        assert not result
    def test_normalize_and_unnormalize(self, stats):
        copied_dataset = pandas.DataFrame({
            'TestColumn': [1.0, 2.0, 3.0],
            'TestColumn2': [5.0, 6.0, 7.0]
        })

        unnormalized_dataset = copied_dataset.copy()

        normalized, statistics = MainTransformer.normalize(copied_dataset,
                                                           inplace=False,
                                                           stats=stats)

        if stats is None:
            mean_one = np.mean(copied_dataset['TestColumn'])
            std_one = np.std(copied_dataset['TestColumn'])

            mean_two = np.mean(copied_dataset['TestColumn2'])
            std_two = np.std(copied_dataset['TestColumn2'])
        else:
            mean_one = stats['TestColumn']['mean']
            std_one = stats['TestColumn']['std']

            mean_two = stats['TestColumn2']['mean']
            std_two = stats['TestColumn2']['std']

        copied_dataset['TestColumn'] -= mean_one
        copied_dataset['TestColumn2'] -= mean_two

        copied_dataset['TestColumn'] /= std_one
        copied_dataset['TestColumn2'] /= std_two

        manual_stats = {
            'TestColumn': {
                'mean': mean_one,
                'std': std_one
            },
            'TestColumn2': {
                'mean': mean_two,
                'std': std_two
            }
        }

        assert copied_dataset.equals(normalized)
        assert statistics == manual_stats

        MainTransformer.unnormalize(normalized, statistics, inplace=True)

        print(unnormalized_dataset)
        print(normalized)
        assert unnormalized_dataset.equals(normalized)
 def test_get_training_and_test_set(self, df, portion, expected):
     X_train, y_train, X_test, y_test, _ = MainTransformer.get_training_and_test_set(
         df, 'Pollutant', None, size=portion, normalize=False)
     assert X_train.shape[0] == expected[0] and X_test.shape[0] == expected[
         1]
     assert X_train.shape[0] == y_train.shape[0] and X_test.shape[
         0] == y_test.shape[0]
    def test_train_not_enough_instances(self):
        cnn = ConvolutionalNeuralNetwork(24)

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.01,
                                                                                          normalize=True)

        with pytest.raises(NotEnoughInstancesError):
            cnn.train(X_train_set, y_train_set, stats=stats)
    def test_save_wrong(self, config):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        result, msg = gp.save_model(config)

        assert not result and isinstance(msg, str)
    def test_periodic_f_add_and_remove(self):
        periodic_f = ['TimeSin', 'TimeCos', 'DateSin', 'DateCos']

        copied_dataset = db_dataset.loc[:]
        initial_keys = set(copied_dataset.keys())

        keys = set()
        for func in periodic_f:
            if func in initial_keys:
                break
        else:
            MainTransformer.periodic_f(copied_dataset)
            keys = set(copied_dataset.keys())

        is_successful = False
        for func in periodic_f:
            if func not in keys:
                break
        else:
            is_successful = True
            assert is_successful

        if not is_successful:
            assert is_successful
            return is_successful

        MainTransformer.remove_periodic_f(copied_dataset)

        keys = set(copied_dataset.loc[:])
        for func in periodic_f:
            if func in keys:
                break
        else:
            assert initial_keys == keys
            return initial_keys == keys

        assert False
        return False
    def test_train_and_test(self):
        cnn = ConvolutionalNeuralNetwork(24)

        # just testing, don't care about overfitting
        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=1,
                                                                                          normalize=True)

        cnn.train(X_train_set, y_train_set, stats=stats)
        predictions = cnn.predict(X_train_set)

        assert len(predictions) == X_train_set.shape[0]
    def test_unnormalize_without_stats(self, stats):
        copied_dataset = pandas.DataFrame({
            'TestColumn': [1.0, 2.0, 3.0],
            'TestColumn2': [5.0, 6.0, 7.0]
        })

        normalized = MainTransformer.unnormalize(copied_dataset,
                                                 stats,
                                                 inplace=False)

        if stats is None:
            assert normalized is None and stats is None
        else:
            assert normalized is not None and stats == {}
    def test_eval(self, error_func):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)
        result, predictions, y_test_set = gp.eval(X_test, y_test, error_func=error_func)

        predictions_size = len(predictions)

        assert predictions_size == len(X_test)
    def test_transform(self):
        main_transformer = MainTransformer(config=config)
        result_weather = main_transformer.add_transformer(
            Transformers.WEATHER_TRANSFORMER)
        result_pollutant = main_transformer.add_transformer(
            Transformers.POLLUTANT_TRANSFORMER)
        weather_size = len(config['weather'].keys())
        pollutant_size = len(config['pollutant'].keys())

        main_transformer.transform()
        dataset = main_transformer.get_dataset()

        assert weather_size + pollutant_size == dataset.shape[1]
    def test_save(self, config):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        result, msg = gp.save_model(config)
        global sparse_gp
        sparse_gp = gp

        assert result and msg is None
    def test_train_and_test_various_datasets(self, given_dataset):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(given_dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        assert gp.stats['n_instances_trained'] == X_train_set.shape[0]
        assert gp.stats['dataset_stats'] == stats

        predictions = gp.predict(X_test, uncertainty=True)

        assert len(predictions) == X_test.shape[0]
    def test_train_and_test_no_uncertainty_not_enough_instances(self):
        cnn = ConvolutionalNeuralNetwork(24)

        # just testing, don't care about overfitting
        X_train_set, y_train_set, X_test, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                               'Pollutant',
                                                                                               'Uncertainty',
                                                                                               size=0.95,
                                                                                               normalize=True)

        cnn.train(X_train_set, y_train_set, stats=stats)
        predictions = cnn.predict(X_test, uncertainty=False)

        n_none_predictions = len(list(filter(lambda x: x[0] is None and x[1] is None, predictions)))

        assert n_none_predictions == len(X_test)
    def test_predict_not_enough_instances(self):
        global cnn
        cnn = ConvolutionalNeuralNetwork(24)

        X_train_set, y_train_set, X_test, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                               'Pollutant',
                                                                                               'Uncertainty',
                                                                                               size=0.95,
                                                                                               normalize=True)

        cnn.train(X_train_set, y_train_set, stats=stats)

        predictions = cnn.predict(X_test=X_test, uncertainty=True)

        n_none_predictions = len(list(filter(lambda x: x[0] is None and x[1] is None, predictions)))

        assert X_test.shape[0] == n_none_predictions
    def test_train(self):
        seq_length = 24
        cnn = ConvolutionalNeuralNetwork(seq_length)

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.85,
                                                                                          normalize=True)

        assert not cnn.is_built
        assert cnn.n_features is None
        assert cnn.seq_length == seq_length
        assert cnn.stats['dataset_stats'] != stats

        cnn.train(X_train_set, y_train_set, stats)

        assert cnn.is_built
        assert cnn.n_features == X_train_set.shape[1]
        assert cnn.seq_length == seq_length
        assert cnn.stats['dataset_stats'] == stats
    def test_eval_not_enough(self):
        cnn = ConvolutionalNeuralNetwork(24)

        dataset, err = DB.DBManager.get_dataset(datetime_from=datetime.strptime("01-01-2018 01:00", '%d-%m-%Y %H:%M'),
                                                datetime_to=datetime.strptime("03-01-2018 06:00", '%d-%m-%Y %H:%M'),
                                                # longitude=-1.395778,
                                                # latitude=50.908140,
                                                config=ConfigReader.CONFIG)

        # just testing, don't care about overfitting
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.8,
                                                                                                    normalize=True)

        cnn.train(X_train_set, y_train_set, stats=stats)
        result, predictions, y_test_set = cnn.eval(X_test, y_test)

        predictions_size = len(predictions)

        assert predictions_size == len(X_test)
    def test_train_and_test(self, uncertainty):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset_gp,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        assert gp.stats['n_instances_trained'] == X_train_set.shape[0]
        assert gp.stats['dataset_stats'] == stats

        predictions = gp.predict(X_test, uncertainty=uncertainty)

        assert len(predictions) == X_test.shape[0]

        if uncertainty:
            values_without_uncertainty = list(filter(lambda x: len(x) != 2, predictions))
            assert len(values_without_uncertainty) == 0

        if not isinstance(uncertainty, bool):
            assert len(list(filter(lambda x: not isinstance(x, tuple), predictions))) == X_test.shape[0]
 def test_remove_features(self, df, removed_elements, left_elements):
     dataset = MainTransformer.remove_features(df, removed_elements)
     assert set(dataset.keys()) == left_elements
 def test_init_transformers(self):
     data_transformer = MainTransformer(config=ConfigReader.CONFIG)
     data_transformer.add_transformer(Transformers.WEATHER_TRANSFORMER)
     data_transformer.add_transformer(Transformers.POLLUTANT_TRANSFORMER)
     data_transformer.transform()
    def test_train_and_test(self, uncertainty):
        global dataset

        data_transformer = MainTransformer(config=ConfigReader.CONFIG)
        data_transformer.add_transformer(Transformers.WEATHER_TRANSFORMER)
        data_transformer.add_transformer(Transformers.POLLUTANT_TRANSFORMER)
        data_transformer.transform()
        dataset = data_transformer.get_dataset()

        complete_dataset = dataset.dropna(inplace=False)
        MainTransformer.periodic_f(complete_dataset)

        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.8,
                                                                                                    normalize=True)

        print(np.array(X_train_set))
        print(y_train_set)

        # X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(complete_dataset,
        #                                                                                             'Pollutant',
        #                                                                                             'Uncertainty',
        #                                                                                             size=0.8,
        #                                                                                             normalize=True)
        #
        # print(np.array(X_train_set))
        # print(X_train_set)
        # print(y_train_set)

        gp = SparseGaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        assert gp.stats['n_instances_trained'] == X_train_set.shape[0]
        assert gp.stats['dataset_stats'] == stats

        predictions = gp.predict(X_test, uncertainty=uncertainty)

        assert len(predictions) == X_test.shape[0]

        if uncertainty:
            values_without_uncertainty = list(filter(lambda x: len(x) != 2, predictions))
            assert len(values_without_uncertainty) == 0

        if not isinstance(uncertainty, bool):
            assert len(list(filter(lambda x: not isinstance(x, tuple), predictions))) == X_test.shape[0]