Python MainTransformer.normalize Examples

Programming Language: Python

Namespace/Package Name: airpyllution

Class/Type: MainTransformer

Method/Function: normalize

Examples at hotexamples.com: 3

Python MainTransformer.normalize - 3 examples found. These are the top rated real world Python examples of airpyllution.MainTransformer.normalize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_training_and_test_set(4)

normalize(3)

MainTransformer(2)

add_transformer(2)

get_dataset(2)

periodic_f(2)

remove_periodic_f(2)

transform(2)

unnormalize(2)

normalize_with_old_stats(1)

remove_features(1)

Example #1

Show file

File: ModelAPI.py Project: imladenov1997/airpollutionps

    def make_predictions(body, overwrite=False):
        """
        Function for making predictions over a time range and locations by a given model
        :param body:
        :param overwrite:
        :return: bool, list - boolean whether it is successful and list with predictions and uncertanties
        """
        dataset = DatasetsApi.get_dataset(body, use_dataframe=True)

        if dataset is None:
            return False, []

        # get dataset with empty pollutant values
        incomplete_dataset = dataset if overwrite else dataset[
            dataset['Pollutant'].isnull()]

        # split the dataset, do not normalize until means and stds are taken from the model
        X_predict, y_predict, _, _, stats = MainTransformer.get_training_and_test_set(
            incomplete_dataset,
            'Pollutant',
            'Uncertainty',
            size=1,
            normalize=False)

        model, model_record, err = ModelApi.get_model_by_name(body['name'])
        predictions = []
        print(err)
        if err is None:
            training_dataset_stats = {}
            print('Verifying features...')
            if X_predict is None or X_predict.shape[1] != model.n_features:
                print('Wrong number of features')
                print(X_predict.shape[1] - 1)
                print(model.n_features)
                return False, []

            print('Checking model stats...')
            if 'dataset_stats' in model.stats:
                training_dataset_stats = model.stats['dataset_stats']
                feature_names = set(training_dataset_stats.keys())
                dataset_features = set(X_predict)
                dataset_features.discard('DateTime')

                print('Checking feature names...')
                if feature_names != dataset_features:
                    return False, []

                print('Normalizing...')
                MainTransformer.normalize(X_predict,
                                          stats=training_dataset_stats,
                                          inplace=True)
            else:
                return False, []

            print('Preidicting...')
            predictions = model.predict(X_predict, uncertainty=True)
            MainTransformer.unnormalize(X_predict,
                                        training_dataset_stats,
                                        inplace=True)
            MainTransformer.remove_periodic_f(X_predict)
            X_predict.loc[:, 'Pollutant'] = Series([x[0] for x in predictions],
                                                   index=X_predict.index)
            X_predict.loc[:,
                          'Uncertainty'] = Series([x[1] for x in predictions],
                                                  index=X_predict.index)
            # add predictions to the DB

            print('Done. Adding to database...')
            optional_data_keyset = set(body['data'].keys())
            dataframe_optional_data = set(X_predict.keys()).difference(
                ModelApi.REQUIRED_FIELDS)
            keys_with_data_to_be_added = optional_data_keyset.intersection(
                dataframe_optional_data)
            results = []
            for index, row in X_predict.iterrows():
                if row['Pollutant'] is not None and math.isnan(
                        row['Pollutant']):
                    continue
                input_instance = {
                    'date_time': index,
                    'longitude': row['Longitude'],
                    'latitude': row['Latitude'],
                    'pollutant': body['pollutant'],
                    'pollution_value': row['Pollutant'],
                    'uncertainty': row['Uncertainty'],
                    'data': {}
                }

                print(body['pollutant'])
                print(row['Pollutant'])

                for key in keys_with_data_to_be_added:
                    input_instance['data'][key] = row[key]

                result = DatasetsApi.insert_single_prediction(input_instance)
                results.append(result)

            predictions = ModelApi.__predictions_to_primitive_float(
                predictions)
            print('failed following: ')
            print(list(filter(lambda x: not x[0], results)))

            return True, predictions

        return False, predictions  # in case that model does not exist

Example #2

Show file

    def get_single_instance_dataset(body, stats=None, prev=None):
        """
        Function for generating a single instance dataset for CNN
        :param body: dict - parameters from request
        :param stats: dict - stats for dataset normalization on which model was trained
        :param prev: int - number of previous records to be generated
        :return: DataFrame with predictions
        """
        if not isinstance(body, dict):
            return None

        if 'date_time' not in body or not isinstance(body['date_time'], str):
            return None

        if 'longitude' not in body or not isinstance(body['longitude'], float):
            return None

        if 'latitude' not in body or not isinstance(body['latitude'], float):
            return None

        df_schema = {
            'DateTime': [body['date_time']],
            'Longitude': body['longitude'],
            'Latitude': body['latitude'],
            'Pollutant': None
        }

        instance_object = {
            'DateTime': body['date_time'],
            'Longitude': body['longitude'],
            'Latitude': body['latitude'],
            'Pollutant': None
        }

        data_keys = list()

        if 'data' in body and 'weather' in body['data']:
            for key in body['data']['weather'].keys():
                df_schema[key] = [body['data']['weather'][key]]
                instance_object[key] = body['data']['weather'][key]

        if isinstance(prev, int):
            ready_data = None
            DatasetsApi.generate_previous_records(df_schema, prev, ready_data)

        dataset = pandas.DataFrame(df_schema)
        automatic_normalization = not isinstance(
            stats, dict)  # if stats parameter is given
        dataset.set_index(keys='DateTime', inplace=True)

        MainTransformer.periodic_f(dataset)
        X_predict, _, _, _, _ = MainTransformer.get_training_and_test_set(
            dataset,
            'Pollutant',
            'Uncertainty',
            size=1,
            normalize=automatic_normalization)

        if not automatic_normalization:
            MainTransformer.normalize(X_predict, stats=stats, inplace=True)

        return X_predict

Example #3

Show file

File: ModelAPI.py Project: imladenov1997/airpollutionps

    def train_model(model_name, body):
        """
        Function for further training a model provided that the model already exists in the DB
        :param model_name: str - name of the existing model
        :param body: dict - body of the request
        :return: (True, None) | (False, str) | (False, list)
        """
        print('Getting dataset...')
        model, model_record, err = ModelApi.get_model_by_name(model_name)

        if model is None:
            return False, err

        dataset = DatasetsApi.get_dataset(body, use_dataframe=True)
        if dataset is None:
            return False, Errors.NO_DATA.value

        complete_dataset = dataset[dataset['Pollutant'].notnull()]

        if 'n_instances_trained' in model.stats and 'dataset_stats' in model.stats:
            updated_stats, new_stats = MainTransformer.normalize_with_old_stats(
                model.stats['n_instances_trained'],
                model.stats['dataset_stats'], complete_dataset)
            MainTransformer.normalize(complete_dataset,
                                      stats=updated_stats,
                                      inplace=True)
        else:
            return False, []

        stats = new_stats

        X_train, y_train, _, _, _ = MainTransformer.get_training_and_test_set(
            complete_dataset,
            'Pollutant',
            'Uncertainty',
            size=1,
            normalize=False)

        training_dataset_stats = {}
        print('Verifying dataset...')
        if 'dataset_stats' in model.stats:
            training_dataset_stats = model.stats['dataset_stats']
            feature_names = set(training_dataset_stats.keys())
            dataset_features = set(X_train)
            dataset_features.discard('DateTime')

            print('Verifying dataset features')
            if feature_names != dataset_features:
                print('feature names', feature_names, training_dataset_stats,
                      training_dataset_stats.keys())
                print('dataset features', dataset_features)
                if feature_names.intersection(
                        dataset_features) == feature_names:
                    print('Dataset is in the expected shape')
                    print('difference')
                    difference = dataset_features.difference(feature_names)
                    print(difference)
                    MainTransformer.remove_features(X_train, difference)
                else:
                    print(feature_names)
                    print(dataset_features)
                    return False, []
        else:
            return False, []

        print('Starting to train model...')
        model.train(X_train, y_train, stats=stats)
        model_params, extra_params = model.model_to_json()
        result = DBManager.upsert_model(model_name,
                                        model_record.type,
                                        model_record.resource,
                                        model_params=model_params,
                                        extra_params=extra_params)
        print(result)
        return result