def make_predictions(body, overwrite=False): """ Function for making predictions over a time range and locations by a given model :param body: :param overwrite: :return: bool, list - boolean whether it is successful and list with predictions and uncertanties """ dataset = DatasetsApi.get_dataset(body, use_dataframe=True) if dataset is None: return False, [] # get dataset with empty pollutant values incomplete_dataset = dataset if overwrite else dataset[ dataset['Pollutant'].isnull()] # split the dataset, do not normalize until means and stds are taken from the model X_predict, y_predict, _, _, stats = MainTransformer.get_training_and_test_set( incomplete_dataset, 'Pollutant', 'Uncertainty', size=1, normalize=False) model, model_record, err = ModelApi.get_model_by_name(body['name']) predictions = [] print(err) if err is None: training_dataset_stats = {} print('Verifying features...') if X_predict is None or X_predict.shape[1] != model.n_features: print('Wrong number of features') print(X_predict.shape[1] - 1) print(model.n_features) return False, [] print('Checking model stats...') if 'dataset_stats' in model.stats: training_dataset_stats = model.stats['dataset_stats'] feature_names = set(training_dataset_stats.keys()) dataset_features = set(X_predict) dataset_features.discard('DateTime') print('Checking feature names...') if feature_names != dataset_features: return False, [] print('Normalizing...') MainTransformer.normalize(X_predict, stats=training_dataset_stats, inplace=True) else: return False, [] print('Preidicting...') predictions = model.predict(X_predict, uncertainty=True) MainTransformer.unnormalize(X_predict, training_dataset_stats, inplace=True) MainTransformer.remove_periodic_f(X_predict) X_predict.loc[:, 'Pollutant'] = Series([x[0] for x in predictions], index=X_predict.index) X_predict.loc[:, 'Uncertainty'] = Series([x[1] for x in predictions], index=X_predict.index) # add predictions to the DB print('Done. Adding to database...') optional_data_keyset = set(body['data'].keys()) dataframe_optional_data = set(X_predict.keys()).difference( ModelApi.REQUIRED_FIELDS) keys_with_data_to_be_added = optional_data_keyset.intersection( dataframe_optional_data) results = [] for index, row in X_predict.iterrows(): if row['Pollutant'] is not None and math.isnan( row['Pollutant']): continue input_instance = { 'date_time': index, 'longitude': row['Longitude'], 'latitude': row['Latitude'], 'pollutant': body['pollutant'], 'pollution_value': row['Pollutant'], 'uncertainty': row['Uncertainty'], 'data': {} } print(body['pollutant']) print(row['Pollutant']) for key in keys_with_data_to_be_added: input_instance['data'][key] = row[key] result = DatasetsApi.insert_single_prediction(input_instance) results.append(result) predictions = ModelApi.__predictions_to_primitive_float( predictions) print('failed following: ') print(list(filter(lambda x: not x[0], results))) return True, predictions return False, predictions # in case that model does not exist
def make_single_prediction(body): """ Function for making predictions over a time range and locations by a given model :param body: :return: bool, list - boolean whether it is successful and list with predictions and uncertanties """ if not isinstance(body, dict): return False, [] if 'name' not in body: return False, [] if 'pollutant' not in body: return False, [] model, model_record, err = ModelApi.get_model_by_name(body['name']) predictions = [] if err is None: prev = None if isinstance(model, ConvolutionalNeuralNetwork): prev = model.seq_length training_dataset_stats = {} if 'dataset_stats' in model.stats: training_dataset_stats = model.stats['dataset_stats'] X_predict = DatasetsApi.get_single_instance_dataset( body, stats=training_dataset_stats, prev=prev) if X_predict is None: return False, [] feature_names = set(training_dataset_stats.keys()) dataset_features = set(X_predict) dataset_features.discard('DateTime') if feature_names != dataset_features: print(feature_names) print(dataset_features) return False, [] else: return False, [] predictions = model.predict(X_predict, uncertainty=True) MainTransformer.unnormalize(X_predict, training_dataset_stats, inplace=True) MainTransformer.remove_periodic_f(X_predict) X_predict.loc[:, 'Pollutant'] = Series([x[0] for x in predictions], index=X_predict.index) X_predict.loc[:, 'Uncertainty'] = Series([x[1] for x in predictions], index=X_predict.index) # add predictions to the DB keys_with_data_to_be_added = {} if 'data' in body: optional_data_keyset = set(body['data'].keys()) dataframe_optional_data = set(X_predict.keys()).difference( ModelApi.REQUIRED_FIELDS) keys_with_data_to_be_added = optional_data_keyset.intersection( dataframe_optional_data) results = [] for index, row in X_predict.iterrows(): if row['Pollutant'] is not None and math.isnan( row['Pollutant']): continue input_instance = { 'date_time': index, 'longitude': row['Longitude'], 'latitude': row['Latitude'], 'pollutant': body['pollutant'], 'pollution_value': row['Pollutant'], 'uncertainty': row['Uncertainty'], 'data': {} } if 'data' in body: for key in keys_with_data_to_be_added: input_instance['data'][key] = row[key] result = DatasetsApi.insert_single_instance(input_instance, predicted=True) result = DatasetsApi.insert_single_prediction(input_instance) results.append(result) predictions = ModelApi.__predictions_to_primitive_float( predictions) print('failed following: ') print(list(filter(lambda x: not x[0], results))) return True, predictions return False, predictions # in case that model does not exist