Example #1
0
def decomposeTimeSeries(D_time,
                        seriesVariable,
                        samplingInterval='week',
                        date_field='TIMESTAMP_IN',
                        decompositionModel='additive'):
    #this function defines a graph decomposing a time series
    #D_time is the reference dataframe
    #date_field is the string with the name of the column containing the datetime series
    #seriesVariable is the string with the name of the column containing the series
    #samplingInterval if week it groups the series for week
    #decompositionModel is the argument of seasonal_decompose (additive or multiplicative)

    # estraggo la serie temporale giornaliera
    timeSeries = pd.DataFrame(D_time[[date_field, seriesVariable]])
    timeSeries_analysis = timeSeries.set_index(date_field).resample('D').sum()
    timeSeries_analysis[date_field] = timeSeries_analysis.index.values

    if samplingInterval == 'month':
        timeSeries_analysis = ts.raggruppaPerMese(timeSeries_analysis,
                                                  date_field, seriesVariable,
                                                  'sum')
        frequency = min(12,
                        len(timeSeries_analysis) -
                        1)  # cerco una frequenza annuale
    elif samplingInterval == 'week':
        timeSeries_analysis = ts.raggruppaPerSettimana(timeSeries_analysis,
                                                       date_field,
                                                       seriesVariable, 'sum')
        frequency = min(4,
                        len(timeSeries_analysis) -
                        1)  # cerco una frequenza mensile
    elif samplingInterval == 'day':
        timeSeries_analysis = timeSeries_analysis[seriesVariable]
        frequency = min(7,
                        len(timeSeries_analysis) -
                        1)  # cerco una frequenza settimanale

    if len(timeSeries_analysis) < 2 * frequency:
        print(
            f"Not enough values to decompose series with sampling interval {samplingInterval}"
        )
        return plt.figure()
    result = seasonal_decompose(timeSeries_analysis,
                                model=decompositionModel,
                                freq=frequency)
    fig = result.plot()
    return fig
Example #2
0
def seasonalityWithfourier(D_time,
                           seriesVariable,
                           samplingInterval='week',
                           date_field='TIMESTAMP_IN',
                           titolo=''):
    #this function decompose the seasonal part of a time series using Fourier transform
    #D_time is the reference dataframe
    #seriesVariable is the string with the name of the column containing the series
    #samplingInterval if week it groups the series for week or gay
    #date_field is the string with the name of the column containing the datetime series
    #titolo is the title of the graph

    # estraggo la serie temporale
    timeSeries = pd.DataFrame(D_time[[date_field, seriesVariable]])
    timeSeries_analysis = timeSeries.set_index(date_field).resample('D').sum()
    timeSeries_analysis[date_field] = timeSeries_analysis.index.values

    if samplingInterval == 'month':
        timeSeries_analysis = ts.raggruppaPerMese(timeSeries_analysis,
                                                  date_field, seriesVariable,
                                                  'sum')
    elif samplingInterval == 'week':
        timeSeries_analysis = ts.raggruppaPerSettimana(timeSeries_analysis,
                                                       date_field,
                                                       seriesVariable, 'sum')
    elif samplingInterval == 'day':
        timeSeries_analysis = timeSeries_analysis[seriesVariable]

    y = np.array(timeSeries_analysis)
    D = ts.fourierAnalysis(y)

    fig = plt.figure()
    plt.stem(1 / D['Frequency_domain_value'], D['Amplitude'])
    plt.title(f"Amplitude spectrum {titolo}")
    plt.xlabel(f"Time domain: {samplingInterval}")
    plt.ylabel('Amplitude')
    return fig
Example #3
0
def predictWithARIMA(D_series,
                     seriesVariable,
                     samplingInterval='week',
                     date_field='TIMESTAMP_IN',
                     titolo='',
                     signifAlpha=0.05,
                     maxValuesSelected=2):

    #this function applies predictions using ARIMA models

    #D_series is the reference dataframe
    #date_field is the string with the name of the column containing the datetime series
    #seriesVariable is the string with the name of the column containing the series
    #samplingInterval if week it groups the series for week
    #signifAlpha is the significance level (0.1 , 0.05, 0.01) to accept or reject the null hypothesis of Dickey fuller
    #maxValuesSelected int defining the number of significant lags to consider in ACF and PACF

    #the function returns
    # fig_CF with the PACF and ACF figure
    #figure_forecast the forecast figure,
    #figure_residuals the residual figure,
    #resultModel the model resulting parameters

    # estraggo la serie temporale
    timeSeries = pd.DataFrame(D_series[[date_field, seriesVariable]])
    timeSeries_analysis = timeSeries.set_index(date_field).resample('D').sum()
    timeSeries_analysis[date_field] = timeSeries_analysis.index.values

    if samplingInterval == 'month':
        timeSeries_analysis = ts.raggruppaPerMese(timeSeries_analysis,
                                                  date_field, seriesVariable,
                                                  'sum')
    elif samplingInterval == 'week':
        timeSeries_analysis = ts.raggruppaPerSettimana(timeSeries_analysis,
                                                       date_field,
                                                       seriesVariable, 'sum')
    elif samplingInterval == 'day':
        timeSeries_analysis = timeSeries_analysis[seriesVariable]

    #transform series to stationarity
    seriesVariable = 'count_TIMESTAMP_IN'
    stationary_series, stationary_model = ts.transformSeriesToStationary(
        timeSeries_analysis, signifAlpha=signifAlpha)

    #aggiungere l'uscita del modello stazionario e il return

    #se sono riuscito a frasformare la serie in stazionaria proseguo
    if len(stationary_series) > 1:

        #detect ACF and PACF
        fig_CF, D_acf_significant, D_pacf_significant = ts.ACF_PACF_plot(
            stationary_series)
        params = ts.returnsignificantLags(D_pacf_significant,
                                          D_acf_significant, maxValuesSelected)

        # Running ARIMA fit, consider that

        figure_forecast, figure_residuals, resultModel = ts.SARIMAXfit(
            stationary_series, params)

        return stationary_model, fig_CF, figure_forecast, figure_residuals, resultModel
    else:  #cannot make the series stationary, cannot use ARIMA
        return [], [], [], [], []
Example #4
0
def predictWithFBPROPHET(D_series,
                         timeVariable,
                         seriesVariable,
                         prediction_results,
                         titolo,
                         samplingInterval='week',
                         predictionsLength=52):
    #D_time is a dataframe containing the timeseries and the values
    #timeVariable is a string with the name of the column of the dataframe containing timestamps
    #seriesVariable is a string with the name of the column of the dataframe containing values
    #predictionsLength is an int with the number of periods to predict
    #prediction_results is the path where to save the output
    #samplingInterval if week it groups the series for week
    # titolo is the title to save the output figure

    # estraggo la serie temporale
    timeSeries = pd.DataFrame(D_series[[timeVariable, seriesVariable]])
    timeSeries_analysis = timeSeries.set_index(timeVariable).resample(
        'D').sum()
    timeSeries_analysis[timeVariable] = timeSeries_analysis.index.values

    if samplingInterval == 'month':
        timeSeries_analysis = ts.raggruppaPerMese(timeSeries_analysis,
                                                  timeVariable, seriesVariable,
                                                  'sum')
    elif samplingInterval == 'week':
        timeSeries_analysis = ts.raggruppaPerSettimana(timeSeries_analysis,
                                                       timeVariable,
                                                       seriesVariable, 'sum')
    elif samplingInterval == 'day':
        timeSeries_analysis = timeSeries_analysis[seriesVariable]

    #prepare input dataframe
    timeSeries_analysis = pd.DataFrame(
        [timeSeries_analysis.index.values, timeSeries_analysis]).transpose()
    timeSeries_analysis.columns = ['ds', 'y']

    m = Prophet()
    m.fit(timeSeries_analysis)

    #make predictions
    future = m.make_future_dataframe(periods=predictionsLength)
    #future.tail()

    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

    #evaluate model goodness
    MSE = mean_squared_error(timeSeries_analysis.y,
                             forecast.yhat[0:len(timeSeries_analysis.y)])

    # Output figure in matplotlib
    forecast_fig = m.plot(forecast)
    components_fig = m.plot_components(forecast)

    #Output with plotly
    #py.init_notebook_mode()
    fig = plot_plotly(m, forecast)  # This returns a plotly Figure
    py.iplot(fig)
    py.plot(fig,
            filename=f"{prediction_results}\\prophet_{titolo}.html",
            auto_open=False)
    return m, forecast_fig, components_fig, MSE
Example #5
0
def bookingStatistics(D_mov,
                      capacityField='QUANTITY',
                      timeVariable='TIMESTAMP_IN',
                      samplingInterval=['day', 'week', 'month']):
    #Analisi trend mensili, settimanali, giornalieri e per giorno della settimana
    #timeVariable e' una variabile di raggruppamento base tempo
    #capacityField e' la variabile di capacita' per studiare le coperture

    #creo dizionari di risultati
    imageResults = {}
    dataframeResults = {}
    dataResults_trend = {}
    coverage_stats = {}

    #calcolo le coperture
    accuracy, _ = getCoverageStats(D_mov,
                                   analysisFieldList=timeVariable,
                                   capacityField=capacityField)

    D_OrderTrend = D_mov.groupby([timeVariable]).size().reset_index()
    D_OrderTrend.columns = ['DatePeriod', 'Orders']
    D_OrderTrend = D_OrderTrend.sort_values(['DatePeriod'])
    #D_OrderTrend['DatePeriod']=pd.to_datetime(D_OrderTrend['DatePeriod'])

    for spInterval in samplingInterval:
        if spInterval == 'month':
            timeSeries_analysis = ts.raggruppaPerMese(D_OrderTrend,
                                                      'DatePeriod', 'Orders',
                                                      'sum')

        elif spInterval == 'week':
            timeSeries_analysis = ts.raggruppaPerSettimana(
                D_OrderTrend, 'DatePeriod', 'Orders', 'sum')

        elif spInterval == 'day':
            timeSeries_analysis = D_OrderTrend.set_index('DatePeriod')
            timeSeries_analysis = timeSeries_analysis['Orders']

        #trend giornaliero
        fig1 = plt.figure()
        plt.plot(timeSeries_analysis.index.values,
                 timeSeries_analysis,
                 color='orange')
        plt.title(f"TREND: {timeVariable} per {spInterval}")
        plt.xticks(rotation=30)
        imageResults[f"trend_{spInterval}"] = fig1

        #distribuzione
        fig2 = plt.figure()
        plt.hist(timeSeries_analysis, color='orange')
        plt.title(f"Frequency analysis of {timeVariable} per {spInterval}")
        plt.xlabel(f"{timeVariable}")
        plt.ylabel(f"{spInterval}")
        imageResults[f"pdf_{spInterval}"] = fig2
        #fig1.savefig(dirResults+'\\02-ContainerPDFDaily.png')

        daily_mean = np.mean(timeSeries_analysis)
        daily_std = np.std(timeSeries_analysis)

        #calcolo i valori
        dataResults_trend[f"{timeVariable}_{spInterval}_MEAN"] = daily_mean
        dataResults_trend[f"{timeVariable}_{spInterval}_STD"] = daily_std

        #assegno le coperture
        coverage_stats[f"{timeVariable}_{spInterval}_MEAN"] = accuracy
        coverage_stats[f"{timeVariable}_{spInterval}_STD"] = accuracy

    #salvo dataframe con i risultati dei trend e le coperture
    D_trend_stat = pd.DataFrame([dataResults_trend,
                                 coverage_stats]).transpose()
    D_trend_stat.columns = ['VALUE', 'ACCURACY']
    dataframeResults['trend_df'] = D_trend_stat

    #distribuzione per giorno della settimana
    D_grouped = ts.raggruppaPerGiornoDellaSettimana(D_OrderTrend,
                                                    timeVariable='DatePeriod',
                                                    seriesVariable='Orders')
    D_grouped['accuracy'] = [accuracy for i in range(0, len(D_grouped))]
    dataframeResults['weekday_df'] = D_grouped
    #D_grouped.to_excel(dirResults+'\\02-ContainerWeekday.xlsx')

    fig3 = plt.figure()
    plt.bar(D_grouped.index.values, D_grouped['mean'], color='orange')
    plt.title(f"N.of {timeVariable} per day of the week")
    plt.xlabel('day of the week')
    plt.ylabel('Frequency')
    imageResults[f"pdf_dayOfTheWeek"] = fig3
    #fig1.savefig(dirResults+'\\02-ContainerPerweekDay.png')

    #D_movDaily.to_excel(dirResults+'\\02-ContainerDailyStats.xlsx')
    return imageResults, dataframeResults
Example #6
0
def plotQuantityTrendWeeklyDaily(D_temp,
                                 date_field='TIMESTAMP_IN',
                                 filterVariable=[],
                                 filterValue=[],
                                 quantityVariable='sum_QUANTITY',
                                 countVariable='count_TIMESTAMP_IN',
                                 titolo=''):
    #the function return a figure with two subplots on for quantities the other for lines
    # D_temp is the input dataframe
    #data_fiels is the string with the column name for the date field
    # filterVariable is the string with the column name for filtering the dataframe
    # filterValue is the value to filter the dataframe
    # quantityVariable is the string with the column name for the sum of the quantities
    # countVariable is the string with the column name for the count
    #titolo is the title of the figure

    if len(filterVariable) > 0:
        D_temp = D_temp[D_temp[filterVariable] == filterValue]
    D_temp = D_temp.sort_values(date_field)
    D_temp = D_temp.reset_index(drop=True)
    D_temp = D_temp.dropna(subset=[date_field, quantityVariable])

    fig, axs = plt.subplots(1, 2, figsize=(10, 8))
    fig.suptitle(titolo)

    #QUANTITIES

    # estraggo la serie temporale giornaliera
    timeSeries = pd.DataFrame(D_temp[[date_field, quantityVariable]])
    timeSeries_day = timeSeries.set_index(date_field).resample('D').sum()

    # estraggo la serie temporale settimanale
    timeSeries_week = ts.raggruppaPerSettimana(timeSeries, date_field,
                                               quantityVariable, 'sum')

    # estraggo la serie temporale mensile
    timeSeries_month = ts.raggruppaPerMese(timeSeries, date_field,
                                           quantityVariable, 'sum')

    #plot weekly-daily
    axs[0].plot(timeSeries_day)
    axs[0].plot(timeSeries_week)
    axs[0].plot(timeSeries_month)
    axs[0].set_title('Quantity trend')
    axs[0].legend(
        ['daily time series', 'weekly time series', 'monthly time series'])
    for tick in axs[0].get_xticklabels():
        tick.set_rotation(45)

    #LINES

    # estraggo la serie temporale giornaliera
    timeSeries = pd.DataFrame(D_temp[[date_field, countVariable]])
    timeSeries_day = timeSeries.set_index(date_field).resample('D').sum()

    # estraggo la serie temporale settimanale
    timeSeries_week = ts.raggruppaPerSettimana(timeSeries, date_field,
                                               countVariable, 'sum')

    # estraggo la serie temporale settimanale
    timeSeries_month = ts.raggruppaPerMese(timeSeries, date_field,
                                           countVariable, 'sum')

    #plot weekly-daily
    axs[1].plot(timeSeries_day)
    axs[1].plot(timeSeries_week)
    axs[1].plot(timeSeries_month)
    axs[1].set_title('Lines trend')
    axs[1].legend(
        ['daily time series', 'weekly time series', 'monthly time series'])
    for tick in axs[1].get_xticklabels():
        tick.set_rotation(45)

    #plt.close('all')
    return fig