Esempio n. 1
0
def create_time_series(resampling_methods, chunk_ids, chunk_type, original_chunks, parameter,
                       window_idx, configs, mean=0, std=1):
    # Apply filler as some time series have missing measurements what would lead to ValueError in prediction
    filler = MissingValuesFiller()

    for resampling in resampling_methods:
        series_per_resampling = dict()
        pred_scalers = dict()

        for chunk_id in chunk_ids:
            current_chunk = original_chunks[original_chunks['CHUNK_ID_FILLED_TH'] == chunk_id]

            # Scale chunk values if it is configured and create filled time series
            if configs.scaling_method == 'standard':
                current_chunk[f'SCALED_{resampling}'] = apply_standard_scaling(
                    current_chunk[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'], mean, std)

                series_per_resampling[chunk_id] = filler.transform(TimeSeries.from_dataframe(
                    df=current_chunk,
                    time_col='CHARTTIME',
                    value_cols=[f'SCALED_{resampling}'],
                    freq='H'))

            elif configs.scaling_method == 'min-max':
                # Darts uses MinMaxScaler by default
                current_scaler = Scaler()

                series_per_resampling[chunk_id] = current_scaler.fit_transform(filler.transform(
                    TimeSeries.from_dataframe(
                        df=current_chunk,
                        time_col='CHARTTIME',
                        value_cols=[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'],
                        freq='H')))

                if chunk_type == 'pred' and \
                        ((configs.with_exogenous_input and resampling != 'MEDIAN') or not configs.with_exogenous_input):
                    pred_scalers[chunk_id] = current_scaler

            else:  # apply no scaling
                series_per_resampling[chunk_id] = filler.transform(TimeSeries.from_dataframe(
                    df=current_chunk,
                    time_col='CHARTTIME',
                    value_cols=[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'],
                    freq='H'))

        # Save series dict
        path = get_script_path(configs)
        write_pickle_file(f'{path}/time_series/time_series_{parameter}_win{window_idx}_{chunk_type}_'
                          f'{resampling.capitalize()}.pickle', series_per_resampling)

        # Save scaler dict if it was filled
        if pred_scalers:
            write_pickle_file(f'{path}/scalers/scalers_{parameter}_win{window_idx}_{resampling.capitalize()}.pickle',
                              pred_scalers)
Esempio n. 2
0
    'N_HIGH_ALARMS', 'N_LOW_ALARMS', 'N_CHUNKS', 'N_ITERATIONS'
])

# Note: Not changeable, see other scripts ending with "covariates" for MAX and MIN
endogenous_input = 'Median'
exogenous_input = np.nan

if style == 'all':
    n_windows = 5
elif style == '20_percent':
    n_windows = 1
else:
    raise ValueError('The style has to be "all" or "20_percent".')

# Note: Only use filler for now, remove after resampling script is fixed
filler = MissingValuesFiller()

for model_type in model_types:
    print(
        f'\n##############################\nCurrent Model Type: {model_type}\n##############################\n',
        file=sys.stderr)

    # Create sub folder for each model type
    if not os.path.isdir(
            f'./data/{approach}/{n_chunks}_chunks/{style}/{model_type}'):
        os.mkdir(f'./data/{approach}/{n_chunks}_chunks/{style}/{model_type}')

    # Create model per model type
    model = RNNModel(
        model=model_type,
        input_chunk_length=input_length,
    def test_fill_lin_series_with_auto_value(self):
        auto_transformer = MissingValuesFiller()
        transformed = auto_transformer.transform(self.lin_series_with_holes)

        self.assertEqual(self.lin_series, transformed)
    def test_fill_const_series_with_const_value(self):
        const_transformer = MissingValuesFiller(fill=2.0)
        transformed = const_transformer.transform(self.const_series_with_holes)

        self.assertEqual(self.const_series, transformed)
#select the time series variable
ts_var = 'CPU_Used'
time_df = df.filter(['date', ts_var])

#convert variable from object dtype to numeric dtype
time_df[ts_var] = pd.to_numeric(time_df[ts_var], errors='coerce')

#remove duplicates
time_df.sort_values("date", inplace=True)
time_df.drop_duplicates(subset="date", keep=False, inplace=True)

#generate time series using darts
series = TimeSeries.from_dataframe(time_df, 'date', ts_var, freq='T')

#treat missing values
filler = MissingValuesFiller()
series = filler.transform(series)

#training and testing dataset
train, val = series.split_after(pd.Timestamp('2019-10-23 19:41:50'))

#FFT model
model = FFT(required_matches=set(), nr_freqs_to_keep=None)
model.fit(train)
pred_val = model.predict(len(val))

#Evaluation metrics
series.plot(label='actual')
pred_val.plot(label='forecast', lw=3)
plt.legend()
print("MAPE:", mape(pred_val, val))