def get_single_instance_dataset(body, stats=None, prev=None): """ Function for generating a single instance dataset for CNN :param body: dict - parameters from request :param stats: dict - stats for dataset normalization on which model was trained :param prev: int - number of previous records to be generated :return: DataFrame with predictions """ if not isinstance(body, dict): return None if 'date_time' not in body or not isinstance(body['date_time'], str): return None if 'longitude' not in body or not isinstance(body['longitude'], float): return None if 'latitude' not in body or not isinstance(body['latitude'], float): return None df_schema = { 'DateTime': [body['date_time']], 'Longitude': body['longitude'], 'Latitude': body['latitude'], 'Pollutant': None } instance_object = { 'DateTime': body['date_time'], 'Longitude': body['longitude'], 'Latitude': body['latitude'], 'Pollutant': None } data_keys = list() if 'data' in body and 'weather' in body['data']: for key in body['data']['weather'].keys(): df_schema[key] = [body['data']['weather'][key]] instance_object[key] = body['data']['weather'][key] if isinstance(prev, int): ready_data = None DatasetsApi.generate_previous_records(df_schema, prev, ready_data) dataset = pandas.DataFrame(df_schema) automatic_normalization = not isinstance( stats, dict) # if stats parameter is given dataset.set_index(keys='DateTime', inplace=True) MainTransformer.periodic_f(dataset) X_predict, _, _, _, _ = MainTransformer.get_training_and_test_set( dataset, 'Pollutant', 'Uncertainty', size=1, normalize=automatic_normalization) if not automatic_normalization: MainTransformer.normalize(X_predict, stats=stats, inplace=True) return X_predict
def get_dataset(body, use_dataframe=True): """ Function for getting a dataset from database :param body: dict - requires several parameters: * type - of ML model (CNN, FullGP, etc.), * range - dict with start and end datetime strings in format Day-Month-Year H:M (24H format) * locations - list with lists of locations - list of location is a list with longitude and latitude, e.g. [longitude, latitude] * pollutant - name of the pollutant, e.g. PM10, PM2.5 * data - dict with additional data such as weather data (data['weather'] is another dict) :param use_dataframe: bool - whether the returned dataset is a dataframe or a list :return: DataFrame | List | None """ if not isinstance(body, dict): return None if 'range' not in body or 'locations' not in body or 'pollutant' not in body: return None if body['range'] is None or body['locations'] is None or body[ 'pollutant'] is None: return None # if not isinstance('range', dict): # return None # # if not isinstance(body['locations'], list): # return None # else: # result = list(filter(lambda c: not isinstance(c, list) or len(c) != 2, body['locations'])) # if len(result) != 0 and len(body['locations']) != 0: # return None # Params required for the DBManager, acts as a config of a given dataset config_params = { "Date": DatasetsApi.DATE_TIME_FORMAT.split(' ')[0], "Time": DatasetsApi.DATE_TIME_FORMAT.split(' ')[1], "pollutant": { "Pollutant": None }, 'weather': {} } start_date = None end_date = None uncertainty = False if 'start' in body['range']: start_date = datetime.datetime.strptime( body['range']['start'], DatasetsApi.DATE_TIME_FORMAT) if 'end' in body['range']: end_date = datetime.datetime.strptime(body['range']['end'], DatasetsApi.DATE_TIME_FORMAT) if 'uncertainty' in body: uncertainty = True location_coordinates = [] if isinstance(body['locations'], list): location_coordinates = list( map(lambda x: (x[0], x[1]), body['locations'])) if isinstance(body['pollutant'], str): config_params['pollutant']['Pollutant'] = body['pollutant'] if 'data' in body and isinstance(body['data'], dict): if 'weather' in body['data'] and isinstance( body['data']['weather'], dict): config_params['weather'] = body['data']['weather'] datasets = [] for coordinates_pair in location_coordinates: dataset, err = DBManager.get_dataset(datetime_from=start_date, datetime_to=end_date, longitude=coordinates_pair[0], latitude=coordinates_pair[1], config=config_params, use_dataframe=use_dataframe, uncertainty=uncertainty) dataset_size = len( dataset.index) if use_dataframe else len(dataset) if err is None and dataset_size != 0: datasets.append(dataset) if len(datasets) == 0: # TODO - IT IS VERY IMPORTANT TO CHANGE ALL CONDITIONS TO CHECK IF df.shape[0] == 0 IN THE API return pandas.DataFrame() if use_dataframe else [] if use_dataframe: complete_dataset = pandas.concat(datasets) MainTransformer.periodic_f(complete_dataset) else: complete_dataset = [] for x in datasets: complete_dataset.extend(x) return complete_dataset