예제 #1
0
    def __get_dataframes_by_dict(self):
        dataframes = {}
        for data_spec in self.ts_data_specs:
            ts = data_spec[constants.TIMESERIES]
            aggregates = data_spec[constants.AGGREGATES]
            granularity = data_spec[constants.GRANULARITY]
            start = data_spec.get(constants.START)
            end = data_spec.get(constants.END)
            missing_data_strategy = data_spec.get(
                constants.MISSING_DATA_STRATEGY)
            label = data_spec.get(constants.LABEL, "default")

            df = timeseries.get_datapoints_frame(
                ts,
                aggregates,
                granularity,
                start,
                end,
                api_key=self.api_key,
                project=self.project,
                cookies=self.cookies,
                processes=self.num_of_processes,
            )
            df = self.__apply_missing_data_strategies(df, ts,
                                                      missing_data_strategy)

            dataframes[label] = df

        return dataframes
예제 #2
0
 def test_get_dps_frame_with_limit(self):
     df = timeseries.get_datapoints_frame(time_series=["constant"],
                                          aggregates=["avg"],
                                          granularity="1m",
                                          start=0,
                                          limit=1)
     assert df.shape[0] == 1
예제 #3
0
 def get_datapoints_frame_response_obj(self, request):
     yield timeseries.get_datapoints_frame(
         time_series=["constant"],
         start=request.param["start"],
         end=request.param["end"],
         aggregates=["avg"],
         granularity="1m",
     )
예제 #4
0
 def test_get_dps_frame_with_limit_with_config_values_from_argument(
         self, unset_config_variables):
     df = timeseries.get_datapoints_frame(time_series=["constant"],
                                          aggregates=["avg"],
                                          granularity="1m",
                                          start=0,
                                          limit=1,
                                          api_key=unset_config_variables[0],
                                          project=unset_config_variables[1])
     assert df.shape[0] == 1
예제 #5
0
 def test_get_dps_frame_with_config_values_from_argument(
         self, unset_config_variables):
     res = timeseries.get_datapoints_frame(
         time_series=["constant"],
         start=1522188000000,
         end=1522620000000,
         aggregates=["avg"],
         granularity="1m",
         api_key=unset_config_variables[0],
         project=unset_config_variables[1])
     assert isinstance(res, pd.DataFrame)
예제 #6
0
    def __get_dataframes_by_dto(self):
        dataframes = {}
        for tsds in self.ts_data_specs:
            ts_list = []
            for ts in tsds.time_series:
                if isinstance(ts, dict):
                    ts_list.append(ts)
                elif isinstance(ts, TimeSeries):
                    ts_dict = dict(
                        name=ts.name,
                        aggregates=ts.aggregates,
                        missingDataStrategy=ts.missing_data_strategy)
                    ts_list.append(ts_dict)
                else:
                    raise InputError(
                        "time_series parameter must be a dict or TimeSeries object"
                    )

            df = timeseries.get_datapoints_frame(
                ts_list,
                tsds.aggregates,
                tsds.granularity,
                tsds.start,
                tsds.end,
                api_key=self.api_key,
                project=self.project,
                cookies=self.cookies,
                processes=self.num_of_processes,
            )
            df = self.__apply_missing_data_strategies(
                df, ts_list, tsds.missing_data_strategy)
            if dataframes.get(tsds.label) is not None:
                raise InputError(
                    "Unique labels for each dataspec must be used")
            dataframes[tsds.label] = df
        return dataframes
예제 #7
0
def main():
    #Plot 3h plots
    # start = datetime.datetime(2018,9,1, second=7)
    # end = datetime.datetime(2018,9,1, hour=3, second=7)
    #Plot correlation
    # start = datetime.datetime(2018,1,2)
    # end = datetime.datetime(2018,1,3)
    #Plot test stationary
    # start = datetime.datetime(2017,11,1)
    # end = datetime.datetime(2018,11,1)
    # granularity = '24h'

    #Data samples
    start = datetime.datetime(2018, 1, 5, hour=23, minute=59, second=23)
    end = datetime.datetime(2018, 1, 7, second=0)
    # end = datetime.datetime(2018,1,2, hour=12)

    granularity = '1s'
    aggregates = [
        'avg',
        # 'min',
        # 'max'
    ]
    tags = [
        constants.COMPRESSOR_SUCTION_PRESSURE,
        constants.COMPRESSOR_SUCTION_TEMPERATURE,
        constants.COMPRESSOR_GAS_INFLOW,
        constants.COMPRESSOR_DISCHARGE_TEMPERATURE,
        constants.COMPRESSOR_DISCHARGE_PRESSURE,
        constants.COMPRESSOR_DISCHARGE_MASS_FLOW,
        constants.COMPRESSOR_DISCHARGE_VOLUME_FLOW,
        constants.ANTI_SURGE_VALVE_POSITION,
        constants.SUCTION_THROTTLE_VALVE_POSITION,
        constants.COMPRESSOR_SHAFT_POWER,
    ]
    test_size = 30
    data = get_datapoints_frame(time_series=tags,
                                start=start,
                                end=end,
                                granularity=granularity,
                                aggregates=aggregates)
    data.set_index(pd.to_datetime(data['timestamp'], unit='ms'), inplace=True)
    dates = pd.to_datetime(data['timestamp'], unit='ms')
    # plotting.plot_input_control(data)
    # plotting.plot_output(data)
    data = data.drop(data.columns[0], axis=1)
    data = data.rename(index=str, columns=control_tags)
    data = data.rename(index=str, columns=input_tags)
    data = data.rename(index=str, columns=output_tags)

    #Test data
    start_minute = 59
    extra_seconds = (60 - start_minute) * 60
    #Adding one hour for LSTM
    start = datetime.datetime(2018,
                              10,
                              2,
                              hour=22,
                              minute=start_minute,
                              second=1)
    # start = datetime.datetime(2018,10, 2, hour=23, minute=start_minute, second=1)
    # end = datetime.datetime(2018, 10, 3, hour=1, second=1)
    end = datetime.datetime(2018, 10, 4, second=1)
    data_test = get_datapoints_frame(time_series=tags,
                                     start=start,
                                     end=end,
                                     granularity=granularity,
                                     aggregates=aggregates)
    data_test.set_index(pd.to_datetime(data_test['timestamp'], unit='ms'),
                        inplace=True)
    dates_test = pd.to_datetime(data_test['timestamp'], unit='ms')
    # plotting.plot_input_control(data_test)
    data_test = data_test.drop(data_test.columns[0], axis=1)
    data_test = data_test.rename(index=str, columns=control_tags)
    data_test = data_test.rename(index=str, columns=input_tags)
    data_test = data_test.rename(index=str, columns=output_tags)
    # scatter_matrix(data)

    #Val data
    start = datetime.datetime(
        2018,
        5,
        11,
    )
    end = datetime.datetime(
        2018,
        5,
        11,
        hour=2,
    )
    # end = datetime.datetime(2018, 10, 4, second=1)
    data_val = get_datapoints_frame(time_series=tags,
                                    start=start,
                                    end=end,
                                    granularity=granularity,
                                    aggregates=aggregates)
    data_val.set_index(pd.to_datetime(data_val['timestamp'], unit='ms'),
                       inplace=True)
    dates_val = pd.to_datetime(data_val['timestamp'], unit='ms')
    # plotting.plot_input_control(data_val)
    data_val = data_val.drop(data_val.columns[0], axis=1)
    data_val = data_val.rename(index=str, columns=control_tags)
    data_val = data_val.rename(index=str, columns=input_tags)
    data_val = data_val.rename(index=str, columns=output_tags)

    # col = 'Discharge volume flow'
    # plot_acf(data[col].interpolate(),lags=60, title=col+' ACF')
    # plot_pacf(data[col].interpolate(),lags=60, title=col + ' PACF')

    # test_stationarity(data[constants.COMPRESSOR_DISCHARGE_VOLUME_FLOW + '|average'].interpolate(), 'Output Volume Flow')

    x_values = [
        'Suction temperature', 'Suction pressure',
        'Gas inflow from separators', 'Anti-surge valve position',
        'Suction throttle valve position', 'Shaft power'
    ]

    y_value = 'Discharge temperature'

    X = data[x_values]
    y = data[y_value]
    X_test = data_test[x_values]
    y_test = data_test[y_value]
    X_val = data_val[x_values]
    y_val = data_val[y_value]

    lag = 0
    X, y = add_lagged_var(X, y, y_value, lag, dates)
    dates = dates[lag:]
    X_test, y_test = add_lagged_var(X_test, y_test, y_value, lag, dates_test)
    dates_test = dates_test[lag:]
    X_val, y_val = add_lagged_var(X_val, y_val, y_value, lag, dates_val)
    dates_val = dates_val[lag:]

    X, y, X_test, y_test, X_val, y_val, remaining_nan, remaining_nan_test, scaler = preprocess_data(
        X, y, X_test, y_test, X_val, y_val)
    dates = dates[remaining_nan:]
    dates_test = dates_test[extra_seconds:]

    X_test = X_test.iloc[extra_seconds - remaining_nan_test:]
    y_test = y_test.iloc[extra_seconds - remaining_nan_test:]

    # 1 hour test data
    # hour = 3600*2
    # X_test = X_test[:hour]
    # y_test = y_test[:hour]
    # dates_test = dates_test[:hour]

    # lr.run_linear_regression(X, y, X_test, y_test, dates, dates_test,x_values, y_value, lag, scaler, actual=False)
    # knn.run_knn(X, y, X_test, y_test, X_val, y_val, dates_test, x_values, y_value, lag, scaler, actual=True)

    loss = 'mae'
    old_model = 'models/' + '_'.join(y_value.split(' ')) + '_' + loss + (
        '' if lag == 0 else str(lag)) + '.h5'
    # old_model = None
    lstm.run_lstm(X,
                  y,
                  X_test,
                  y_test,
                  X_val,
                  y_val,
                  dates_test,
                  x_values,
                  y_value,
                  lag,
                  scaler,
                  old_model,
                  save=False,
                  loss=loss)

    # lstm_act_temp, lstm_pred_temp = lstm.predict(data[
    #                                                  [
    #                                                     constants.COMPRESSOR_SUCTION_PRESSURE + '|average',
    #                                                     constants.COMPRESSOR_SUCTION_TEMPERATURE + '|average',
    #                                                     constants.COMPRESSOR_GAS_INFLOW + '|average'
    #                                                  ]
    #                                              ].interpolate(),
    #                                              data[constants.COMPRESSOR_DISCHARGE_TEMPERATURE + '|average'].interpolate(),
    #                                              test_size=test_size)
    #
    #
    # plotting.plt_act_pred(pd.DataFrame({'Actual': lstm_act_temp}), pd.DataFrame({'Predicted': lstm_pred_temp}), dates, 'Temperature')

    plt.legend()
    plt.show()
예제 #8
0
def main():
    # Let us first figure out what data is available
    start = datetime.datetime(1970, 1, 1)
    end = '1w-ago'
    granularity = '1d'
    aggregates = ['avg']
    tags = list(input_tags.keys())
    data = get_datapoints_frame(time_series=tags,
                                start=start,
                                end=end,
                                granularity=granularity,
                                aggregates=aggregates)

    T = pd.to_datetime(data.timestamp, unit='ms')
    print(T.iloc[0], T.iloc[-1])

    # Next, let us see the input pressure vs output pressure over the last year
    # on hourly aggregates
    start = '365d-ago'
    end = '1w-ago'
    granularity = '1h'
    aggregates = ['avg', 'min', 'max']
    tags = ['VAL_23-PT-92532:X.Value', 'VAL_23-PT-92539:X.Value']
    data = get_datapoints_frame(time_series=tags,
                                start=start,
                                end=end,
                                granularity=granularity,
                                aggregates=aggregates)

    fig, ax = plt.subplots(figsize=(8, 5))
    T = pd.to_datetime(data.timestamp, unit='ms')
    for var in data.drop(['timestamp'], axis=1).columns:
        ax.plot(T, data[var], label=var)

    plt.legend()
    plt.show()

    # Let us look at the min out - max inn
    data['minmax'] = data['VAL_23-PT-92539:X.Value|min'] - data[
        'VAL_23-PT-92532:X.Value|max']
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.plot(T, data['minmax'])

    plt.legend()
    plt.show()

    # Let us look at the difference at 1s resolution
    start = '2w-ago'
    end = '1w-ago'
    granularity = '1s'
    aggregates = ['avg']
    tags = ['VAL_23-PT-92532:X.Value', 'VAL_23-PT-92539:X.Value']
    data = get_datapoints_frame(time_series=tags,
                                start=start,
                                end=end,
                                granularity=granularity,
                                aggregates=aggregates)
    T = pd.to_datetime(data.timestamp, unit='ms')
    data['minmax'] = data['VAL_23-PT-92539:X.Value|average'] - data[
        'VAL_23-PT-92532:X.Value|average']
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.plot(T, data['minmax'])

    plt.show()