Ejemplo n.º 1
0
def make_prediction(enter_chan, enter_time) -> dict:
    """Make a prediction using the saved best configurations."""
    #validated_data = validate_inputs(input_data=data)

    enter_time_split = enter_time.split("/")
    enter_time_tuple = tuple([int(i) for i in enter_time_split])
    endings = (
        0,
        0,
        0,
        0,
        0,
    )
    start_time = (enter_time_tuple + endings)
    start_pred_time = datetime.datetime.fromtimestamp(time.mktime(start_time))

    #current_best_config = model_config.BEST_CONFIG_DICT.get(str(enter_chan))
    current_best_config = datamanagement.get_channel_best_configurations(
        enter_chan)

    start_pred = (pd.to_datetime(start_pred_time))

    hourly_data = datamanagement.get_all_channels_hourly_data()
    #hourly_data = datamanagement.calculate_hourly_averages(data)
    # select all data relating to a particular channel
    all_channel_data = hourly_data.loc[hourly_data.channel_id == enter_chan]

    # clean the channel data, fill gaps and set datetime as index
    all_channel_data_clean = fill_gaps_and_set_datetime(all_channel_data)

    # set start time as being 8 weeks before start of forecast
    # THis will need to be changed to max number of whole days

    start_base_time = str(start_pred - datetime.timedelta(84)) + '+3:00'
    start_pred = str(start_pred) + '+3:00'
    #print((start_base_time))
    #print((start_pred))
    # select final range of data to be used in forecast
    data_to_use = all_channel_data_clean.loc[start_base_time:(
        start_pred)].values

    yhat, yhat_ci = sarima_forecast(data_to_use, current_best_config)
    results = {
        'prediction time': start_pred,
        'predictions': yhat.tolist(),
        'prediction_ci': yhat_ci.tolist()
    }

    _logger.info(f'Predictions: {results}')

    return results
Ejemplo n.º 2
0
def make_prediction_using_averages_for_all_locations(entered_chan,
                                                     entered_time,
                                                     entered_latitude,
                                                     entered_longitude):
    """
        Generating predictions based on channel and time entered
        saves the predictions in database and returns the predictions
    """
    channels_with_out_predictions = []

    start_pred_time = (pd.to_datetime(entered_time))
    current_best_config = datamanagement.get_channel_best_configurations(
        entered_chan)
    #to de removed #
    print(current_best_config)

    start_pred = (pd.to_datetime(start_pred_time))
    # generating list of hours based on start time
    fcst_hours, start_hour = processing.forecast_hours(start_pred_time)
    selected_channel_id = int(entered_chan)

    data = datamanagement.get_channel_data_raw(selected_channel_id)

    hourly_data = datamanagement.calculate_hourly_averages(data)
    all_channel_data = hourly_data
    if all_channel_data.empty:
        results = {
            'predictions': 0,
            'channel_id': selected_channel_id,
            'errors': 'no data available for this channel'
        }
        channels_with_out_predictions.append(results)

    else:
        all_channel_data_clean = fill_gaps_and_set_datetime(all_channel_data)
        all_channel_data_clean.to_csv('all_channel_data_clean' +
                                      str(selected_channel_id) + '.csv')

        if not current_best_config:
            results = {
                'predictions': 0,
                'errors': 'no configurations for this location'
            }
            channels_with_out_predictions.append(results)

        else:
            best_number_of_days_back = int(
                current_best_config[0].get('number_of_days_to_use'))
            considered_hours = int(
                current_best_config[0].get('considered_hours'))

            start_base_time = str(start_pred - datetime.timedelta(
                best_number_of_days_back)) + '+3:00'
            start_pred = str(start_pred) + '+3:00'
            print(start_base_time + '\t\t start pred:' + start_pred)

            # select final range of data to be used in forecast
            data_to_use = all_channel_data_clean.loc[
                start_base_time:start_pred].values
            pd.DataFrame(data_to_use).to_csv("data_to_use" +
                                             str(selected_channel_id) + ".csv")
            print('length of data to use: \n')
            print(len(data_to_use))

            if len(data_to_use) == 0:
                results = {
                    'predictions': 0,
                    'errors':
                    'no predictions, check status of the device at this location',
                    'channel_id': selected_channel_id
                }
                channels_with_out_predictions.append(results)
            else:
                # generating mean, lower ci, upper ci
                yhat_24, lower_ci, upper_ci = simple_forecast_ci(
                    data_to_use, best_number_of_days_back, considered_hours)

                model_predictions = []
                model_name = 'simple_average_prediction'
                channel_id = selected_channel_id
                location_name = " "
                location_latitude = float(entered_latitude)
                location_longitude = float(entered_longitude)
                prediction_start_datetime = pd.to_datetime(start_pred)
                created_at = datetime.datetime.now()
                result_modified = []

                for i in range(0, len(yhat_24)):
                    hour_to_add = i + 1
                    prediction_value = yhat_24[i]
                    prediction_datetime = pd.to_datetime(
                        prediction_start_datetime +
                        datetime.timedelta(hours=hour_to_add))

                    lower_confidence_interval_value = lower_ci[i]
                    upper_confidence_interval_value = upper_ci[i]
                    resultx = {
                        'prediction_time': prediction_datetime,
                        'prediction_value': prediction_value,
                        'lower_ci': lower_confidence_interval_value,
                        'upper_ci': upper_confidence_interval_value
                    }
                    result_modified.append(resultx)

                    model_predictions_tuple = (
                        model_name, channel_id, location_name,
                        location_latitude, location_longitude,
                        prediction_value, prediction_start_datetime,
                        prediction_datetime, lower_confidence_interval_value,
                        upper_confidence_interval_value, created_at)
                    model_predictions.append(model_predictions_tuple)

                results = {
                    'prediction_start_time': start_pred,
                    'prediction_hours': fcst_hours,
                    'predictions': yhat_24,
                    'prediction_upper_ci': upper_ci,
                    'prediction_lower_ci': lower_ci
                }

                formated_results = {'predictions': result_modified}
                print(formated_results)
                #datamanagement.save_predictions_all(model_predictions)
                _logger.info(f'Predictions: {results}')
                return formated_results
Ejemplo n.º 3
0
def make_prediction_using_averages(entered_chan, entered_time,
                                   entered_latitude, entered_longitude):
    """
        Generating predictions based on channel and time entered
        saves the predictions in database and returns the predictions
    """

    start_pred_time = entered_time
    current_best_config = datamanagement.get_channel_best_configurations(
        entered_chan)

    start_pred = (pd.to_datetime(start_pred_time))
    # generating list of hours based on start time
    fcst_hours, start_hour = processing.forecast_hours(start_pred_time)
    selected_channel_id = int(entered_chan)
    #hourly_data = datamanagement.get_channel_hourly_data(selected_channel_id)
    #data = datamanagement.get_channel_data(selected_channel_id)
    data = datamanagement.get_channel_data_raw(selected_channel_id)

    hourly_data = datamanagement.calculate_hourly_averages(data)
    all_channel_data = hourly_data
    if all_channel_data.empty:
        results = {'predictions': 0}
        return results
    else:
        all_channel_data_clean = fill_gaps_and_set_datetime(all_channel_data)

        if not current_best_config:
            best_number_of_days_back = ast.literal_eval(
                current_best_config[0])[0]
            config_to_use = ast.literal_eval(current_best_config[0])
        else:
            best_number_of_days_back = int(
                current_best_config[0].get('number_of_days_to_use'))
            considered_hours = int(
                current_best_config[0].get('considered_hours'))

        start_base_time = str(start_pred - datetime.timedelta(
            best_number_of_days_back)) + '+3:00'
        start_pred = str(start_pred) + '+3:00'

        # select final range of data to be used in forecast
        data_to_use = all_channel_data_clean.loc[
            start_base_time:start_pred].values

        # generating mean, lower ci, upper ci
        yhat_24, lower_ci, upper_ci = simple_forecast_ci(
            data_to_use, best_number_of_days_back, considered_hours)

        model_predictions = []
        model_name = 'simple_average_prediction'
        channel_id = selected_channel_id
        location_name = " "
        location_latitude = float(entered_latitude)
        location_longitude = float(entered_longitude)
        prediction_start_datetime = pd.to_datetime(start_pred)
        created_at = datetime.datetime.now()
        result_modified = []

        for i in range(0, len(yhat_24)):
            hour_to_add = i + 1
            prediction_value = yhat_24[i]
            prediction_datetime = pd.to_datetime(prediction_start_datetime +
                                                 datetime.timedelta(
                                                     hours=hour_to_add))

            lower_confidence_interval_value = lower_ci[i]
            upper_confidence_interval_value = upper_ci[i]
            resultx = {
                'prediction_time': prediction_datetime,
                'prediction_value': prediction_value,
                'lower_ci': lower_confidence_interval_value,
                'upper_ci': upper_confidence_interval_value
            }
            result_modified.append(resultx)

            model_predictions_tuple = (model_name, channel_id, location_name,
                                       location_latitude, location_longitude,
                                       prediction_value,
                                       prediction_start_datetime,
                                       prediction_datetime,
                                       lower_confidence_interval_value,
                                       upper_confidence_interval_value,
                                       created_at)
            model_predictions.append(model_predictions_tuple)

        results = {
            'prediction_start_time': start_pred,
            'prediction_hours': fcst_hours,
            'predictions': yhat_24,
            'prediction_upper_ci': upper_ci,
            'prediction_lower_ci': lower_ci
        }

        formated_results = {'predictions': result_modified}
        datamanagement.save_predictions(model_predictions)
        _logger.info(f'Predictions: {results}')
        return formated_results