def make_prediction(enter_chan, enter_time) -> dict: """Make a prediction using the saved best configurations.""" #validated_data = validate_inputs(input_data=data) enter_time_split = enter_time.split("/") enter_time_tuple = tuple([int(i) for i in enter_time_split]) endings = ( 0, 0, 0, 0, 0, ) start_time = (enter_time_tuple + endings) start_pred_time = datetime.datetime.fromtimestamp(time.mktime(start_time)) #current_best_config = model_config.BEST_CONFIG_DICT.get(str(enter_chan)) current_best_config = datamanagement.get_channel_best_configurations( enter_chan) start_pred = (pd.to_datetime(start_pred_time)) hourly_data = datamanagement.get_all_channels_hourly_data() #hourly_data = datamanagement.calculate_hourly_averages(data) # select all data relating to a particular channel all_channel_data = hourly_data.loc[hourly_data.channel_id == enter_chan] # clean the channel data, fill gaps and set datetime as index all_channel_data_clean = fill_gaps_and_set_datetime(all_channel_data) # set start time as being 8 weeks before start of forecast # THis will need to be changed to max number of whole days start_base_time = str(start_pred - datetime.timedelta(84)) + '+3:00' start_pred = str(start_pred) + '+3:00' #print((start_base_time)) #print((start_pred)) # select final range of data to be used in forecast data_to_use = all_channel_data_clean.loc[start_base_time:( start_pred)].values yhat, yhat_ci = sarima_forecast(data_to_use, current_best_config) results = { 'prediction time': start_pred, 'predictions': yhat.tolist(), 'prediction_ci': yhat_ci.tolist() } _logger.info(f'Predictions: {results}') return results
def make_prediction_using_averages_for_all_locations(entered_chan, entered_time, entered_latitude, entered_longitude): """ Generating predictions based on channel and time entered saves the predictions in database and returns the predictions """ channels_with_out_predictions = [] start_pred_time = (pd.to_datetime(entered_time)) current_best_config = datamanagement.get_channel_best_configurations( entered_chan) #to de removed # print(current_best_config) start_pred = (pd.to_datetime(start_pred_time)) # generating list of hours based on start time fcst_hours, start_hour = processing.forecast_hours(start_pred_time) selected_channel_id = int(entered_chan) data = datamanagement.get_channel_data_raw(selected_channel_id) hourly_data = datamanagement.calculate_hourly_averages(data) all_channel_data = hourly_data if all_channel_data.empty: results = { 'predictions': 0, 'channel_id': selected_channel_id, 'errors': 'no data available for this channel' } channels_with_out_predictions.append(results) else: all_channel_data_clean = fill_gaps_and_set_datetime(all_channel_data) all_channel_data_clean.to_csv('all_channel_data_clean' + str(selected_channel_id) + '.csv') if not current_best_config: results = { 'predictions': 0, 'errors': 'no configurations for this location' } channels_with_out_predictions.append(results) else: best_number_of_days_back = int( current_best_config[0].get('number_of_days_to_use')) considered_hours = int( current_best_config[0].get('considered_hours')) start_base_time = str(start_pred - datetime.timedelta( best_number_of_days_back)) + '+3:00' start_pred = str(start_pred) + '+3:00' print(start_base_time + '\t\t start pred:' + start_pred) # select final range of data to be used in forecast data_to_use = all_channel_data_clean.loc[ start_base_time:start_pred].values pd.DataFrame(data_to_use).to_csv("data_to_use" + str(selected_channel_id) + ".csv") print('length of data to use: \n') print(len(data_to_use)) if len(data_to_use) == 0: results = { 'predictions': 0, 'errors': 'no predictions, check status of the device at this location', 'channel_id': selected_channel_id } channels_with_out_predictions.append(results) else: # generating mean, lower ci, upper ci yhat_24, lower_ci, upper_ci = simple_forecast_ci( data_to_use, best_number_of_days_back, considered_hours) model_predictions = [] model_name = 'simple_average_prediction' channel_id = selected_channel_id location_name = " " location_latitude = float(entered_latitude) location_longitude = float(entered_longitude) prediction_start_datetime = pd.to_datetime(start_pred) created_at = datetime.datetime.now() result_modified = [] for i in range(0, len(yhat_24)): hour_to_add = i + 1 prediction_value = yhat_24[i] prediction_datetime = pd.to_datetime( prediction_start_datetime + datetime.timedelta(hours=hour_to_add)) lower_confidence_interval_value = lower_ci[i] upper_confidence_interval_value = upper_ci[i] resultx = { 'prediction_time': prediction_datetime, 'prediction_value': prediction_value, 'lower_ci': lower_confidence_interval_value, 'upper_ci': upper_confidence_interval_value } result_modified.append(resultx) model_predictions_tuple = ( model_name, channel_id, location_name, location_latitude, location_longitude, prediction_value, prediction_start_datetime, prediction_datetime, lower_confidence_interval_value, upper_confidence_interval_value, created_at) model_predictions.append(model_predictions_tuple) results = { 'prediction_start_time': start_pred, 'prediction_hours': fcst_hours, 'predictions': yhat_24, 'prediction_upper_ci': upper_ci, 'prediction_lower_ci': lower_ci } formated_results = {'predictions': result_modified} print(formated_results) #datamanagement.save_predictions_all(model_predictions) _logger.info(f'Predictions: {results}') return formated_results
def make_prediction_using_averages(entered_chan, entered_time, entered_latitude, entered_longitude): """ Generating predictions based on channel and time entered saves the predictions in database and returns the predictions """ start_pred_time = entered_time current_best_config = datamanagement.get_channel_best_configurations( entered_chan) start_pred = (pd.to_datetime(start_pred_time)) # generating list of hours based on start time fcst_hours, start_hour = processing.forecast_hours(start_pred_time) selected_channel_id = int(entered_chan) #hourly_data = datamanagement.get_channel_hourly_data(selected_channel_id) #data = datamanagement.get_channel_data(selected_channel_id) data = datamanagement.get_channel_data_raw(selected_channel_id) hourly_data = datamanagement.calculate_hourly_averages(data) all_channel_data = hourly_data if all_channel_data.empty: results = {'predictions': 0} return results else: all_channel_data_clean = fill_gaps_and_set_datetime(all_channel_data) if not current_best_config: best_number_of_days_back = ast.literal_eval( current_best_config[0])[0] config_to_use = ast.literal_eval(current_best_config[0]) else: best_number_of_days_back = int( current_best_config[0].get('number_of_days_to_use')) considered_hours = int( current_best_config[0].get('considered_hours')) start_base_time = str(start_pred - datetime.timedelta( best_number_of_days_back)) + '+3:00' start_pred = str(start_pred) + '+3:00' # select final range of data to be used in forecast data_to_use = all_channel_data_clean.loc[ start_base_time:start_pred].values # generating mean, lower ci, upper ci yhat_24, lower_ci, upper_ci = simple_forecast_ci( data_to_use, best_number_of_days_back, considered_hours) model_predictions = [] model_name = 'simple_average_prediction' channel_id = selected_channel_id location_name = " " location_latitude = float(entered_latitude) location_longitude = float(entered_longitude) prediction_start_datetime = pd.to_datetime(start_pred) created_at = datetime.datetime.now() result_modified = [] for i in range(0, len(yhat_24)): hour_to_add = i + 1 prediction_value = yhat_24[i] prediction_datetime = pd.to_datetime(prediction_start_datetime + datetime.timedelta( hours=hour_to_add)) lower_confidence_interval_value = lower_ci[i] upper_confidence_interval_value = upper_ci[i] resultx = { 'prediction_time': prediction_datetime, 'prediction_value': prediction_value, 'lower_ci': lower_confidence_interval_value, 'upper_ci': upper_confidence_interval_value } result_modified.append(resultx) model_predictions_tuple = (model_name, channel_id, location_name, location_latitude, location_longitude, prediction_value, prediction_start_datetime, prediction_datetime, lower_confidence_interval_value, upper_confidence_interval_value, created_at) model_predictions.append(model_predictions_tuple) results = { 'prediction_start_time': start_pred, 'prediction_hours': fcst_hours, 'predictions': yhat_24, 'prediction_upper_ci': upper_ci, 'prediction_lower_ci': lower_ci } formated_results = {'predictions': result_modified} datamanagement.save_predictions(model_predictions) _logger.info(f'Predictions: {results}') return formated_results