def test_save_wrong(self, config):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        result, msg = gp.save_model(config)

        assert not result and isinstance(msg, str)
    def test_update_stats(self):
        full_gp = GaussianProcesses()

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.85,
                                                                                          normalize=True)

        full_gp.train(X_train_set, y_train_set, stats=stats)

        instances = full_gp.stats['n_instances_trained']
        dataset_stats = full_gp.stats['dataset_stats']

        assert X_train_set.shape[0] == instances
        assert stats == dataset_stats

        X_train_set, y_train_set, _, _, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                          'Pollutant',
                                                                                          'Uncertainty',
                                                                                          size=0.5,
                                                                                          normalize=True)

        full_gp.train(X_train_set, y_train_set, stats)

        assert X_train_set.shape[0] + instances == full_gp.stats['n_instances_trained']
        assert len(full_gp.stats['dataset_stats'].keys()) == len(stats.keys()) == len(dataset_stats.keys())

        missing_data = X_train_set.drop(axis=1, columns='Temperature', inplace=False, errors='ignore')

        with pytest.raises(WrongNumberOfFeatures):
            full_gp.train(missing_data, y_train_set, stats)
    def test_eval(self, error_func):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)
        result, predictions, y_test_set = gp.eval(X_test, y_test, error_func=error_func)

        predictions_size = len(predictions)

        assert predictions_size == len(X_test)
    def test_load_saved_model(self):
        global full_gp
        copied_config = copy.copy(ConfigReader.CONFIG)
        copied_config['loadedModel'] = {
            'modelName': 'gp_full'
        }

        loaded = GaussianProcesses()
        result, msg = loaded.load_model(copied_config)

        assert result and msg is None

        assert full_gp.stats == loaded.stats
        assert full_gp.kernel.to_dict() == loaded.kernel.to_dict()
        assert full_gp.model.param_array.tolist() == loaded.model.param_array.tolist()
    def test_save(self, config):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        result, msg = gp.save_model(config)
        global full_gp
        full_gp = gp

        assert result and msg is None
    def test_train_and_test_various_datasets(self, given_dataset):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(given_dataset,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        assert gp.stats['n_instances_trained'] == X_train_set.shape[0]
        assert gp.stats['dataset_stats'] == stats

        predictions = gp.predict(X_test, uncertainty=True)

        assert len(predictions) == X_test.shape[0]
Example #7
0
    def get_model_by_name(name):
        """
        Get a model from database and reproduce it given the parameters saved
        :param name: str - name of the model
        :return: (None, None, str) | (None, dict, str) | (BaseModel, dict, None) - str is error message, dict is model's
        parameters from DB, BaseModel is the instance of the model, might be ConvolutionalNeuralNetwork,
        GaussianProcesses, SparseGaussianProcesses up to date...
        """
        model_record, err = DBManager.get_model_by_name(name)
        if model_record is None:
            return None, None, err

        if model_record.type == 'CNN':
            cnn, err = ConvolutionalNeuralNetwork.new_from_json(
                model_record.model_params, model_record.extra_params)
            return cnn, model_record, None
        elif model_record.type == 'FullGP':
            full_gp, err = GaussianProcesses.new_from_json(
                model_record.model_params, model_record.extra_params)
            return full_gp, model_record, None
        elif model_record.type == 'SparseGP':
            sparse_gp, err = SparseGaussianProcesses.new_from_json(
                model_record.model_params, model_record.extra_params)
            return sparse_gp, model_record, None

        return None, model_record, err
    def test_retrain(self, dataset_one, dataset_two):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset_one,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.6,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        instances = gp.stats['n_instances_trained']
        model_stats = gp.stats['dataset_stats']

        assert instances == X_train_set.shape[0]
        assert model_stats == stats

        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset_two,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp.train(X_train_set, y_train_set, stats)

        assert instances + X_train_set.shape[0] == gp.stats['n_instances_trained']
        assert model_stats != gp.stats['dataset_stats'] != stats
    def test_train_and_test(self, uncertainty):
        X_train_set, y_train_set, X_test, y_test, stats = MainTransformer.get_training_and_test_set(dataset_gp,
                                                                                                    'Pollutant',
                                                                                                    'Uncertainty',
                                                                                                    size=0.5,
                                                                                                    normalize=True)

        gp = GaussianProcesses()
        gp.train(X_train_set, y_train_set, stats=stats)

        assert gp.stats['n_instances_trained'] == X_train_set.shape[0]
        assert gp.stats['dataset_stats'] == stats

        predictions = gp.predict(X_test, uncertainty=uncertainty)

        assert len(predictions) == X_test.shape[0]

        if uncertainty:
            values_without_uncertainty = list(filter(lambda x: len(x) != 2, predictions))
            assert len(values_without_uncertainty) == 0

        if not isinstance(uncertainty, bool):
            assert len(list(filter(lambda x: not isinstance(x, tuple), predictions))) == X_test.shape[0]
    def test_model_to_json_load_from_json(self):
        global full_gp
        model_params, extra_params = full_gp.model_to_json()
        model_params_dict = json.loads(model_params)
        extra_params_dict = json.loads(extra_params)

        assert model_params_dict['data']['kernel'] == full_gp.kernel.to_dict()
        assert model_params_dict['data']['params'] == full_gp.model.param_array.tolist()

        loaded_gp, msg = GaussianProcesses.new_from_json(model_params_dict, extra_params_dict)

        assert msg is None
        assert full_gp.stats == loaded_gp.stats
        assert full_gp.kernel.to_dict() == loaded_gp.kernel.to_dict()
        assert full_gp.model.param_array.tolist() == loaded_gp.model.param_array.tolist()
Example #11
0
    def create_model(name, body):
        """
        Function for creating a non-existing model and training it with a given dataset
        This function should happen in the background to prevent overhead to Flask
        :param name: unique name of the model
        :param body: dict with following data:
        * type - type of model (CNN, FullGP, etc.)
        * range - dict with start and end fields, each storing datetime in DATE_TIME_FORMAT
        * locations - list of lists, nested list should have two entries 0 - longitude, 1 - latitude
        * pollutant - name of the polllutant PM10, PM2.5
        * data - dict object with additional data that would be stored as JSONB data, it could have keys such as
        weather
        :return: bool: whether model was created
        """

        if body is None:
            return False, Errors.MISSING_BODY.value

        print('Getting dataset...')
        dataset = DatasetsApi.get_dataset(body, use_dataframe=True)
        print(dataset)

        if dataset is None:
            return False, Errors.NO_DATA.value

        model = None
        complete_dataset = dataset[dataset['Pollutant'].notnull()]

        X_train, y_train, _, _, stats = MainTransformer.get_training_and_test_set(
            complete_dataset,
            'Pollutant',
            'Uncertainty',
            size=1,
            normalize=True)

        if 'type' not in body:
            return False, Errors.NO_MODEL_TYPE_GIVEN.value

        if body['type'] == 'CNN':
            model = ConvolutionalNeuralNetwork()
            model.train(X_train, y_train, stats=stats)
            resource = 'keras'
            model_params, extra_params = model.model_to_json()
            result = DBManager.upsert_model(name,
                                            body['type'],
                                            resource,
                                            model_params=model_params,
                                            extra_params=extra_params)
            return True, None
        elif body['type'] == 'FullGP':
            model = GaussianProcesses()
            model.train(X_train, y_train, stats=stats)
            resource = 'GPy'
            model_params, extra_params = model.model_to_json()
            result = DBManager.upsert_model(name,
                                            body['type'],
                                            resource,
                                            model_params=model_params,
                                            extra_params=extra_params)
            return True, None
        elif body['type'] == 'SparseGP':
            model = SparseGaussianProcesses()
            model.train(X_train, y_train, stats=stats)
            resource = 'GPy'
            model_params, extra_params = model.model_to_json()
            result = DBManager.upsert_model(name,
                                            body['type'],
                                            resource,
                                            model_params=model_params,
                                            extra_params=extra_params)
            return True, None

        return False, Errors.NO_SUCH_MODEL_TYPE.value