def __get_dataframes_by_dict(self): dataframes = {} for data_spec in self.ts_data_specs: ts = data_spec[constants.TIMESERIES] aggregates = data_spec[constants.AGGREGATES] granularity = data_spec[constants.GRANULARITY] start = data_spec.get(constants.START) end = data_spec.get(constants.END) missing_data_strategy = data_spec.get( constants.MISSING_DATA_STRATEGY) label = data_spec.get(constants.LABEL, "default") df = timeseries.get_datapoints_frame( ts, aggregates, granularity, start, end, api_key=self.api_key, project=self.project, cookies=self.cookies, processes=self.num_of_processes, ) df = self.__apply_missing_data_strategies(df, ts, missing_data_strategy) dataframes[label] = df return dataframes
def test_get_dps_frame_with_limit(self): df = timeseries.get_datapoints_frame(time_series=["constant"], aggregates=["avg"], granularity="1m", start=0, limit=1) assert df.shape[0] == 1
def get_datapoints_frame_response_obj(self, request): yield timeseries.get_datapoints_frame( time_series=["constant"], start=request.param["start"], end=request.param["end"], aggregates=["avg"], granularity="1m", )
def test_get_dps_frame_with_limit_with_config_values_from_argument( self, unset_config_variables): df = timeseries.get_datapoints_frame(time_series=["constant"], aggregates=["avg"], granularity="1m", start=0, limit=1, api_key=unset_config_variables[0], project=unset_config_variables[1]) assert df.shape[0] == 1
def test_get_dps_frame_with_config_values_from_argument( self, unset_config_variables): res = timeseries.get_datapoints_frame( time_series=["constant"], start=1522188000000, end=1522620000000, aggregates=["avg"], granularity="1m", api_key=unset_config_variables[0], project=unset_config_variables[1]) assert isinstance(res, pd.DataFrame)
def __get_dataframes_by_dto(self): dataframes = {} for tsds in self.ts_data_specs: ts_list = [] for ts in tsds.time_series: if isinstance(ts, dict): ts_list.append(ts) elif isinstance(ts, TimeSeries): ts_dict = dict( name=ts.name, aggregates=ts.aggregates, missingDataStrategy=ts.missing_data_strategy) ts_list.append(ts_dict) else: raise InputError( "time_series parameter must be a dict or TimeSeries object" ) df = timeseries.get_datapoints_frame( ts_list, tsds.aggregates, tsds.granularity, tsds.start, tsds.end, api_key=self.api_key, project=self.project, cookies=self.cookies, processes=self.num_of_processes, ) df = self.__apply_missing_data_strategies( df, ts_list, tsds.missing_data_strategy) if dataframes.get(tsds.label) is not None: raise InputError( "Unique labels for each dataspec must be used") dataframes[tsds.label] = df return dataframes
def main(): #Plot 3h plots # start = datetime.datetime(2018,9,1, second=7) # end = datetime.datetime(2018,9,1, hour=3, second=7) #Plot correlation # start = datetime.datetime(2018,1,2) # end = datetime.datetime(2018,1,3) #Plot test stationary # start = datetime.datetime(2017,11,1) # end = datetime.datetime(2018,11,1) # granularity = '24h' #Data samples start = datetime.datetime(2018, 1, 5, hour=23, minute=59, second=23) end = datetime.datetime(2018, 1, 7, second=0) # end = datetime.datetime(2018,1,2, hour=12) granularity = '1s' aggregates = [ 'avg', # 'min', # 'max' ] tags = [ constants.COMPRESSOR_SUCTION_PRESSURE, constants.COMPRESSOR_SUCTION_TEMPERATURE, constants.COMPRESSOR_GAS_INFLOW, constants.COMPRESSOR_DISCHARGE_TEMPERATURE, constants.COMPRESSOR_DISCHARGE_PRESSURE, constants.COMPRESSOR_DISCHARGE_MASS_FLOW, constants.COMPRESSOR_DISCHARGE_VOLUME_FLOW, constants.ANTI_SURGE_VALVE_POSITION, constants.SUCTION_THROTTLE_VALVE_POSITION, constants.COMPRESSOR_SHAFT_POWER, ] test_size = 30 data = get_datapoints_frame(time_series=tags, start=start, end=end, granularity=granularity, aggregates=aggregates) data.set_index(pd.to_datetime(data['timestamp'], unit='ms'), inplace=True) dates = pd.to_datetime(data['timestamp'], unit='ms') # plotting.plot_input_control(data) # plotting.plot_output(data) data = data.drop(data.columns[0], axis=1) data = data.rename(index=str, columns=control_tags) data = data.rename(index=str, columns=input_tags) data = data.rename(index=str, columns=output_tags) #Test data start_minute = 59 extra_seconds = (60 - start_minute) * 60 #Adding one hour for LSTM start = datetime.datetime(2018, 10, 2, hour=22, minute=start_minute, second=1) # start = datetime.datetime(2018,10, 2, hour=23, minute=start_minute, second=1) # end = datetime.datetime(2018, 10, 3, hour=1, second=1) end = datetime.datetime(2018, 10, 4, second=1) data_test = get_datapoints_frame(time_series=tags, start=start, end=end, granularity=granularity, aggregates=aggregates) data_test.set_index(pd.to_datetime(data_test['timestamp'], unit='ms'), inplace=True) dates_test = pd.to_datetime(data_test['timestamp'], unit='ms') # plotting.plot_input_control(data_test) data_test = data_test.drop(data_test.columns[0], axis=1) data_test = data_test.rename(index=str, columns=control_tags) data_test = data_test.rename(index=str, columns=input_tags) data_test = data_test.rename(index=str, columns=output_tags) # scatter_matrix(data) #Val data start = datetime.datetime( 2018, 5, 11, ) end = datetime.datetime( 2018, 5, 11, hour=2, ) # end = datetime.datetime(2018, 10, 4, second=1) data_val = get_datapoints_frame(time_series=tags, start=start, end=end, granularity=granularity, aggregates=aggregates) data_val.set_index(pd.to_datetime(data_val['timestamp'], unit='ms'), inplace=True) dates_val = pd.to_datetime(data_val['timestamp'], unit='ms') # plotting.plot_input_control(data_val) data_val = data_val.drop(data_val.columns[0], axis=1) data_val = data_val.rename(index=str, columns=control_tags) data_val = data_val.rename(index=str, columns=input_tags) data_val = data_val.rename(index=str, columns=output_tags) # col = 'Discharge volume flow' # plot_acf(data[col].interpolate(),lags=60, title=col+' ACF') # plot_pacf(data[col].interpolate(),lags=60, title=col + ' PACF') # test_stationarity(data[constants.COMPRESSOR_DISCHARGE_VOLUME_FLOW + '|average'].interpolate(), 'Output Volume Flow') x_values = [ 'Suction temperature', 'Suction pressure', 'Gas inflow from separators', 'Anti-surge valve position', 'Suction throttle valve position', 'Shaft power' ] y_value = 'Discharge temperature' X = data[x_values] y = data[y_value] X_test = data_test[x_values] y_test = data_test[y_value] X_val = data_val[x_values] y_val = data_val[y_value] lag = 0 X, y = add_lagged_var(X, y, y_value, lag, dates) dates = dates[lag:] X_test, y_test = add_lagged_var(X_test, y_test, y_value, lag, dates_test) dates_test = dates_test[lag:] X_val, y_val = add_lagged_var(X_val, y_val, y_value, lag, dates_val) dates_val = dates_val[lag:] X, y, X_test, y_test, X_val, y_val, remaining_nan, remaining_nan_test, scaler = preprocess_data( X, y, X_test, y_test, X_val, y_val) dates = dates[remaining_nan:] dates_test = dates_test[extra_seconds:] X_test = X_test.iloc[extra_seconds - remaining_nan_test:] y_test = y_test.iloc[extra_seconds - remaining_nan_test:] # 1 hour test data # hour = 3600*2 # X_test = X_test[:hour] # y_test = y_test[:hour] # dates_test = dates_test[:hour] # lr.run_linear_regression(X, y, X_test, y_test, dates, dates_test,x_values, y_value, lag, scaler, actual=False) # knn.run_knn(X, y, X_test, y_test, X_val, y_val, dates_test, x_values, y_value, lag, scaler, actual=True) loss = 'mae' old_model = 'models/' + '_'.join(y_value.split(' ')) + '_' + loss + ( '' if lag == 0 else str(lag)) + '.h5' # old_model = None lstm.run_lstm(X, y, X_test, y_test, X_val, y_val, dates_test, x_values, y_value, lag, scaler, old_model, save=False, loss=loss) # lstm_act_temp, lstm_pred_temp = lstm.predict(data[ # [ # constants.COMPRESSOR_SUCTION_PRESSURE + '|average', # constants.COMPRESSOR_SUCTION_TEMPERATURE + '|average', # constants.COMPRESSOR_GAS_INFLOW + '|average' # ] # ].interpolate(), # data[constants.COMPRESSOR_DISCHARGE_TEMPERATURE + '|average'].interpolate(), # test_size=test_size) # # # plotting.plt_act_pred(pd.DataFrame({'Actual': lstm_act_temp}), pd.DataFrame({'Predicted': lstm_pred_temp}), dates, 'Temperature') plt.legend() plt.show()
def main(): # Let us first figure out what data is available start = datetime.datetime(1970, 1, 1) end = '1w-ago' granularity = '1d' aggregates = ['avg'] tags = list(input_tags.keys()) data = get_datapoints_frame(time_series=tags, start=start, end=end, granularity=granularity, aggregates=aggregates) T = pd.to_datetime(data.timestamp, unit='ms') print(T.iloc[0], T.iloc[-1]) # Next, let us see the input pressure vs output pressure over the last year # on hourly aggregates start = '365d-ago' end = '1w-ago' granularity = '1h' aggregates = ['avg', 'min', 'max'] tags = ['VAL_23-PT-92532:X.Value', 'VAL_23-PT-92539:X.Value'] data = get_datapoints_frame(time_series=tags, start=start, end=end, granularity=granularity, aggregates=aggregates) fig, ax = plt.subplots(figsize=(8, 5)) T = pd.to_datetime(data.timestamp, unit='ms') for var in data.drop(['timestamp'], axis=1).columns: ax.plot(T, data[var], label=var) plt.legend() plt.show() # Let us look at the min out - max inn data['minmax'] = data['VAL_23-PT-92539:X.Value|min'] - data[ 'VAL_23-PT-92532:X.Value|max'] fig, ax = plt.subplots(figsize=(8, 5)) ax.plot(T, data['minmax']) plt.legend() plt.show() # Let us look at the difference at 1s resolution start = '2w-ago' end = '1w-ago' granularity = '1s' aggregates = ['avg'] tags = ['VAL_23-PT-92532:X.Value', 'VAL_23-PT-92539:X.Value'] data = get_datapoints_frame(time_series=tags, start=start, end=end, granularity=granularity, aggregates=aggregates) T = pd.to_datetime(data.timestamp, unit='ms') data['minmax'] = data['VAL_23-PT-92539:X.Value|average'] - data[ 'VAL_23-PT-92532:X.Value|average'] fig, ax = plt.subplots(figsize=(8, 5)) ax.plot(T, data['minmax']) plt.show()