コード例 #1
0
    def get_single_instance_dataset(body, stats=None, prev=None):
        """
        Function for generating a single instance dataset for CNN
        :param body: dict - parameters from request
        :param stats: dict - stats for dataset normalization on which model was trained
        :param prev: int - number of previous records to be generated
        :return: DataFrame with predictions
        """
        if not isinstance(body, dict):
            return None

        if 'date_time' not in body or not isinstance(body['date_time'], str):
            return None

        if 'longitude' not in body or not isinstance(body['longitude'], float):
            return None

        if 'latitude' not in body or not isinstance(body['latitude'], float):
            return None

        df_schema = {
            'DateTime': [body['date_time']],
            'Longitude': body['longitude'],
            'Latitude': body['latitude'],
            'Pollutant': None
        }

        instance_object = {
            'DateTime': body['date_time'],
            'Longitude': body['longitude'],
            'Latitude': body['latitude'],
            'Pollutant': None
        }

        data_keys = list()

        if 'data' in body and 'weather' in body['data']:
            for key in body['data']['weather'].keys():
                df_schema[key] = [body['data']['weather'][key]]
                instance_object[key] = body['data']['weather'][key]

        if isinstance(prev, int):
            ready_data = None
            DatasetsApi.generate_previous_records(df_schema, prev, ready_data)

        dataset = pandas.DataFrame(df_schema)
        automatic_normalization = not isinstance(
            stats, dict)  # if stats parameter is given
        dataset.set_index(keys='DateTime', inplace=True)

        MainTransformer.periodic_f(dataset)
        X_predict, _, _, _, _ = MainTransformer.get_training_and_test_set(
            dataset,
            'Pollutant',
            'Uncertainty',
            size=1,
            normalize=automatic_normalization)

        if not automatic_normalization:
            MainTransformer.normalize(X_predict, stats=stats, inplace=True)

        return X_predict
コード例 #2
0
    def get_dataset(body, use_dataframe=True):
        """
        Function for getting a dataset from database
        :param body: dict - requires several parameters:
          * type - of ML model (CNN, FullGP, etc.),
          * range - dict with start and end datetime strings in format Day-Month-Year H:M (24H format)
          * locations - list with lists of locations - list of location is a list with longitude and latitude, e.g.
          [longitude, latitude]
          * pollutant - name of the pollutant, e.g. PM10, PM2.5
          * data - dict with additional data such as weather data (data['weather'] is another dict)
        :param use_dataframe: bool - whether the returned dataset is a dataframe or a list
        :return: DataFrame | List | None
        """

        if not isinstance(body, dict):
            return None

        if 'range' not in body or 'locations' not in body or 'pollutant' not in body:
            return None

        if body['range'] is None or body['locations'] is None or body[
                'pollutant'] is None:
            return None

        # if not isinstance('range', dict):
        #     return None
        #
        # if not isinstance(body['locations'], list):
        #     return None
        # else:
        #     result = list(filter(lambda c: not isinstance(c, list) or len(c) != 2, body['locations']))
        #     if len(result) != 0 and len(body['locations']) != 0:
        #         return None

        # Params required for the DBManager, acts as a config of a given dataset
        config_params = {
            "Date": DatasetsApi.DATE_TIME_FORMAT.split(' ')[0],
            "Time": DatasetsApi.DATE_TIME_FORMAT.split(' ')[1],
            "pollutant": {
                "Pollutant": None
            },
            'weather': {}
        }

        start_date = None
        end_date = None
        uncertainty = False

        if 'start' in body['range']:
            start_date = datetime.datetime.strptime(
                body['range']['start'], DatasetsApi.DATE_TIME_FORMAT)

        if 'end' in body['range']:
            end_date = datetime.datetime.strptime(body['range']['end'],
                                                  DatasetsApi.DATE_TIME_FORMAT)

        if 'uncertainty' in body:
            uncertainty = True

        location_coordinates = []
        if isinstance(body['locations'], list):
            location_coordinates = list(
                map(lambda x: (x[0], x[1]), body['locations']))

        if isinstance(body['pollutant'], str):
            config_params['pollutant']['Pollutant'] = body['pollutant']

        if 'data' in body and isinstance(body['data'], dict):
            if 'weather' in body['data'] and isinstance(
                    body['data']['weather'], dict):
                config_params['weather'] = body['data']['weather']

        datasets = []

        for coordinates_pair in location_coordinates:
            dataset, err = DBManager.get_dataset(datetime_from=start_date,
                                                 datetime_to=end_date,
                                                 longitude=coordinates_pair[0],
                                                 latitude=coordinates_pair[1],
                                                 config=config_params,
                                                 use_dataframe=use_dataframe,
                                                 uncertainty=uncertainty)

            dataset_size = len(
                dataset.index) if use_dataframe else len(dataset)

            if err is None and dataset_size != 0:
                datasets.append(dataset)

        if len(datasets) == 0:
            # TODO - IT IS VERY IMPORTANT TO CHANGE ALL CONDITIONS TO CHECK IF df.shape[0] == 0 IN THE API
            return pandas.DataFrame() if use_dataframe else []

        if use_dataframe:
            complete_dataset = pandas.concat(datasets)
            MainTransformer.periodic_f(complete_dataset)
        else:
            complete_dataset = []
            for x in datasets:
                complete_dataset.extend(x)

        return complete_dataset