Exemple #1
0
def get_time_series():
    """ Function returns time series for time series forecasting task """
    len_forecast = 100
    synthetic_ts = generate_synthetic_data(length=1000)

    train_data = synthetic_ts[:-len_forecast]
    test_data = synthetic_ts[-len_forecast:]

    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast))

    train_input = InputData(idx=np.arange(0, len(train_data)),
                            features=train_data,
                            target=train_data,
                            task=task,
                            data_type=DataTypesEnum.ts)

    start_forecast = len(train_data)
    end_forecast = start_forecast + len_forecast
    predict_input = InputData(idx=np.arange(start_forecast, end_forecast),
                              features=train_data,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

    return train_input, predict_input, test_data
Exemple #2
0
def prepare_input_data(forecast_length, horizon):
    ts = np.array([
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 101
    ])

    # Forecast for 2 elements ahead
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=forecast_length))

    # To avoid data leak
    ts_train = ts[:-horizon]
    train_input = InputData(idx=np.arange(0, len(ts_train)),
                            features=ts_train,
                            target=ts_train,
                            task=task,
                            data_type=DataTypesEnum.ts)

    start_forecast = len(ts_train)
    end_forecast = start_forecast + forecast_length
    predict_input = InputData(idx=np.arange(start_forecast, end_forecast),
                              features=ts,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

    return train_input, predict_input
Exemple #3
0
def get_synthetic_ts_data_period(n_steps=1000,
                                 forecast_length=1,
                                 max_window_size=50):
    simulated_data = ArmaProcess().generate_sample(nsample=n_steps)
    x1 = np.arange(0, n_steps)
    x2 = np.arange(0, n_steps) + 1

    simulated_data = simulated_data + x1 * 0.0005 - x2 * 0.0001

    periodicity = np.sin(x1 / 50)

    simulated_data = simulated_data + periodicity

    task = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size,
                            return_all_steps=False))

    data = InputData(idx=np.arange(0, n_steps),
                     features=np.asarray([x1, x2]).T,
                     target=simulated_data,
                     task=task,
                     data_type=DataTypesEnum.ts)

    return train_test_data_setup(data)
Exemple #4
0
def test_lagged_with_invalid_params_fit_correctly():
    """ The function define a chain with incorrect parameters in the lagged
    transformation. During the training of the chain, the parameter 'window_size'
    is corrected
    """
    window_size = 600
    len_forecast = 50

    # The length of the time series is 500 elements
    project_root_path = str(project_root())
    file_path = os.path.join(project_root_path,
                             'test/data/short_time_series.csv')
    df = pd.read_csv(file_path)
    time_series = np.array(df['sea_height'])

    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast))

    train_input = InputData(idx=np.arange(0, len(time_series)),
                            features=time_series,
                            target=time_series,
                            task=task,
                            data_type=DataTypesEnum.ts)

    # Get chain with lagged transformation in it
    chain = get_ts_chain(window_size)

    # Fit it
    chain.fit(train_input)

    is_chain_was_fitted = True
    assert is_chain_was_fitted
Exemple #5
0
def run_ts_forecasting_example(with_plot=True,
                               with_pipeline_vis=True,
                               timeout=None):
    train_data_path = f'{fedot_project_root()}/examples/data/salaries.csv'

    target = pd.read_csv(train_data_path)['target']

    # Define forecast length and define parameters - forecast length
    forecast_length = 30
    task_parameters = TsForecastingParams(forecast_length=forecast_length)

    # init model for the time series forecasting
    model = Fedot(problem='ts_forecasting',
                  task_params=task_parameters,
                  timeout=timeout)

    # run AutoML model design in the same way
    pipeline = model.fit(features=train_data_path, target='target')
    if with_pipeline_vis:
        pipeline.show()

    # use model to obtain forecast
    forecast = model.predict(features=train_data_path)

    print(
        model.get_metrics(metric_names=['rmse', 'mae', 'mape'], target=target))

    # plot forecasting result
    if with_plot:
        model.plot_prediction()

    return forecast
Exemple #6
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     is_visualise=False,
                                     timeout=5):
    # Prepare data for train and test
    ssh_history, ws_history, ssh_obs = prepare_input_data(
        train_file_path, test_file_path)

    historical_data = {
        'ws': ws_history,  # additional variable
        'ssh': ssh_history,  # target variable
    }

    fedot = Fedot(
        problem='ts_forecasting',
        task_params=TsForecastingParams(forecast_length=forecast_length),
        timeout=timeout,
        verbose_level=4)

    pipeline = fedot.fit(features=historical_data, target=ssh_history)
    fedot.forecast(historical_data, forecast_length=forecast_length)
    metric = fedot.get_metrics(target=ssh_obs)

    if is_visualise:
        pipeline.show()
        fedot.plot_prediction()

    return metric
Exemple #7
0
    def __chain_fit_predict(self, timeseries_train: np.array, len_gap: int):
        """
        The method makes a prediction as a sequence of elements based on a
        training sample. There are two main parts: fit model and predict.

        :param timeseries_train: part of the time series for training the model
        :param len_gap: number of elements in the gap
        :return: array without gaps
        """

        task = Task(TaskTypesEnum.ts_forecasting,
                    TsForecastingParams(forecast_length=len_gap))

        input_data = InputData(idx=np.arange(0, len(timeseries_train)),
                               features=timeseries_train,
                               target=timeseries_train,
                               task=task,
                               data_type=DataTypesEnum.ts)

        # Making predictions for the missing part in the time series
        self.chain.fit_from_scratch(input_data)

        # "Test data" for making prediction for a specific length
        start_forecast = len(timeseries_train)
        end_forecast = start_forecast + len_gap
        idx_test = np.arange(start_forecast, end_forecast)
        test_data = InputData(idx=idx_test,
                              features=timeseries_train,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

        predicted_values = self.chain.predict(test_data)
        predicted_values = np.ravel(np.array(predicted_values.predict))
        return predicted_values
Exemple #8
0
def get_synthetic_ts_data_period(n_steps=6000,
                                 forecast_length=1,
                                 max_window_size=50,
                                 with_exog: bool = True) -> InputData:
    x1 = np.arange(0, n_steps) / 10
    x2 = np.arange(0, n_steps) + 1

    x1_exog = np.arange(0, n_steps + forecast_length) / 10
    x2_exog = np.arange(0, n_steps + forecast_length) + 1

    simulated_data = x1 * 0.005 - x2 * 0.001
    periodicity = np.sin(x1 * 0.4)
    random = np.random.normal(0, 0.1, n_steps)
    simulated_data = simulated_data + periodicity + random

    task = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size,
                            return_all_steps=False,
                            make_future_prediction=True))

    exog_features = np.asarray([x1_exog, x2_exog]).T
    if not with_exog:
        # move target to features
        exog_features = None
    input_data = InputData(idx=np.arange(0, n_steps),
                           features=exog_features,
                           target=simulated_data,
                           task=task,
                           data_type=DataTypesEnum.ts)
    return input_data
Exemple #9
0
def prepare_input_data(len_forecast, train_data_features, train_data_target,
                       test_data_features):
    """ Function return prepared data for fit and predict

    :param len_forecast: forecast length
    :param train_data_features: time series which can be used as predictors for train
    :param train_data_target: time series which can be used as target for train
    :param test_data_features: time series which can be used as predictors for prediction

    :return train_input: Input Data for fit
    :return predict_input: Input Data for predict
    :return task: Time series forecasting task with parameters
    """

    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast))

    train_input = InputData(idx=np.arange(0, len(train_data_features)),
                            features=train_data_features,
                            target=train_data_target,
                            task=task,
                            data_type=DataTypesEnum.ts)

    # Determine indices for forecast
    start_forecast = len(train_data_features)
    end_forecast = start_forecast + len_forecast
    predict_input = InputData(idx=np.arange(start_forecast, end_forecast),
                              features=test_data_features,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

    return train_input, predict_input, task
Exemple #10
0
def test_multivariate_ts():
    forecast_length = 1

    file_path_train = 'cases/data/metocean/metocean_data_train.csv'
    full_path_train = os.path.join(str(fedot_project_root()), file_path_train)

    # a dataset for a final validation of the composed model
    file_path_test = 'cases/data/metocean/metocean_data_test.csv'
    full_path_test = os.path.join(str(fedot_project_root()), file_path_test)

    target_history, add_history, obs = prepare_input_data(
        full_path_train, full_path_test)

    historical_data = {
        'ws': add_history,  # additional variable
        'ssh': target_history,  # target variable
    }

    fedot = Fedot(
        problem='ts_forecasting',
        composer_params=composer_params,
        task_params=TsForecastingParams(forecast_length=forecast_length))
    fedot.fit(features=historical_data, target=target_history)
    forecast = fedot.forecast(historical_data, forecast_length=forecast_length)
    assert forecast is not None
Exemple #11
0
def prepare_train_test_input(train_part, len_forecast):
    """ Function return prepared data for fit and predict

    :param len_forecast: forecast length
    :param train_part: time series which can be used as predictors for train

    :return train_input: Input Data for fit
    :return predict_input: Input Data for predict
    :return task: Time series forecasting task with parameters
    """

    # Specify the task to solve
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast))

    train_input = InputData(idx=np.arange(0, len(train_part)),
                            features=train_part,
                            target=train_part,
                            task=task,
                            data_type=DataTypesEnum.ts)

    start_forecast = len(train_part)
    end_forecast = start_forecast + len_forecast
    predict_input = InputData(idx=np.arange(start_forecast, end_forecast),
                              features=train_part,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

    return train_input, predict_input, task
Exemple #12
0
def test_pipeline_with_wrong_data():
    pipeline = Pipeline(PrimaryNode('linear'))
    data_seq = np.arange(0, 10)
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=10))

    data = InputData(idx=data_seq, features=data_seq, target=data_seq,
                     data_type=DataTypesEnum.ts, task=task)

    with pytest.raises(ValueError):
        pipeline.fit(data)
Exemple #13
0
def test_api_forecast_correct(task_type: str = 'ts_forecasting'):
    # The forecast length must be equal to 12
    forecast_length = 12
    train_data, test_data, _ = get_dataset(task_type)
    model = Fedot(problem='ts_forecasting', composer_params=composer_params,
                  task_params=TsForecastingParams(forecast_length=forecast_length))

    model.fit(features=train_data)
    ts_forecast = model.predict(features=train_data)
    metric = model.get_metrics(target=test_data.target, metric_names='rmse')

    assert len(ts_forecast) == forecast_length
    assert metric['rmse'] >= 0
Exemple #14
0
def test_chain_with_wrong_data():
    chain = Chain(PrimaryNode('linear'))
    data_seq = np.arange(0, 10)
    task = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=10,
                            max_window_size=len(data_seq) + 1,
                            return_all_steps=False))

    data = InputData(idx=data_seq,
                     features=data_seq,
                     target=data_seq,
                     data_type=DataTypesEnum.ts,
                     task=task)

    with pytest.raises(ValueError):
        chain.fit(data)
Exemple #15
0
def test_api_forecast_numpy_input_with_static_model_correct(task_type: str = 'ts_forecasting'):
    forecast_length = 10
    train_data, test_data, _ = get_dataset(task_type)
    model = Fedot(problem='ts_forecasting',
                  task_params=TsForecastingParams(forecast_length=forecast_length))

    # Define chain for prediction
    node_lagged = PrimaryNode('lagged')
    chain = Chain(SecondaryNode('linear', nodes_from=[node_lagged]))

    model.fit(features=train_data.features,
              target=train_data.target,
              predefined_model=chain)
    ts_forecast = model.predict(features=train_data)
    metric = model.get_metrics(target=test_data.target, metric_names='rmse')

    assert len(ts_forecast) == forecast_length
    assert metric['rmse'] >= 0
Exemple #16
0
def make_forecast(df, len_forecast: int, time_series_label: str):
    """
    Function for making time series forecasting with Prophet library

    :param df: dataframe to process
    :param len_forecast: forecast length
    :param time_series_label: name of time series to process

    :return predicted_values: forecast
    :return model_name: name of the model (always 'AutoTS')
    """

    # Define parameters
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast))

    # Init model for the time series forecasting
    model = Fedot(problem='ts_forecasting',
                  task_params=task.task_params,
                  composer_params={
                      'timeout': 1,
                      'preset': 'ultra_light_tun'
                  },
                  preset='ultra_light_tun')

    input_data = InputData(idx=np.arange(0, len(df)),
                           features=np.array(df[time_series_label]),
                           target=np.array(df[time_series_label]),
                           task=task,
                           data_type=DataTypesEnum.ts)

    start_forecast = len(df)
    end_forecast = start_forecast + len_forecast
    predict_input = InputData(idx=np.arange(start_forecast, end_forecast),
                              features=np.array(df[time_series_label]),
                              target=np.array(df[time_series_label]),
                              task=task,
                              data_type=DataTypesEnum.ts)
    # Run AutoML model design in the same way
    pipeline = model.fit(features=input_data)
    predicted_values = model.predict(predict_input)

    model_name = 'FEDOT'
    return predicted_values, model_name
Exemple #17
0
def synthetic_with_exogenous_ts():
    """ Method returns InputData for time series forecasting task with
    exogenous variable """
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=forecast_length))

    # Time series with exogenous variable
    ts_train = np.array(
        [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130])
    ts_exog = np.array(
        [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23])

    ts_test = np.array([140, 150, 160, 170])
    ts_test_exog = np.array([24, 25, 26, 27])

    # Indices for forecast
    start_forecast = len(ts_train)
    end_forecast = start_forecast + forecast_length

    # Input for source time series
    train_source_ts = InputData(idx=np.arange(0, len(ts_train)),
                                features=ts_train,
                                target=ts_train,
                                task=task,
                                data_type=DataTypesEnum.ts)
    predict_source_ts = InputData(idx=np.arange(start_forecast, end_forecast),
                                  features=ts_train,
                                  target=None,
                                  task=task,
                                  data_type=DataTypesEnum.ts)

    # Input for exogenous variable
    train_exog_ts = InputData(idx=np.arange(0, len(ts_train)),
                              features=ts_exog,
                              target=ts_train,
                              task=task,
                              data_type=DataTypesEnum.ts)
    predict_exog_ts = InputData(idx=np.arange(start_forecast, end_forecast),
                                features=ts_test_exog,
                                target=None,
                                task=task,
                                data_type=DataTypesEnum.ts)
    return train_source_ts, predict_source_ts, train_exog_ts, predict_exog_ts, ts_test
Exemple #18
0
def prepare_input_data(train_file_path, test_file_path, forecast_length):
    """ Function for preparing InputData for train and test algorithm

    :param train_file_path: path to the csv file for training
    :param test_file_path: path to the csv file for validation
    :param forecast_length: forecast length for prediction

    :return dataset_to_train: InputData for train
    :return dataset_to_validate: InputData for validation
    """
    # specify the task to solve
    task_to_solve = Task(TaskTypesEnum.ts_forecasting,
                         TsForecastingParams(forecast_length=forecast_length))

    # Load train and test dataframes
    full_path_train = os.path.join(str(project_root()), train_file_path)
    full_path_test = os.path.join(str(project_root()), test_file_path)
    df_train = pd.read_csv(full_path_train)
    df_test = pd.read_csv(full_path_test)

    # Get idx for train and series for train
    train_feature_ts = np.ravel(np.array(df_train['wind_speed']))
    train_target_ts = np.ravel(np.array(df_train['sea_height']))
    idx_train = np.arange(0, len(train_feature_ts))
    dataset_to_train = InputData(idx=idx_train,
                                 features=train_feature_ts,
                                 target=train_target_ts,
                                 task=task_to_solve,
                                 data_type=DataTypesEnum.ts)

    start_forecast = len(idx_train)
    end_forecast = start_forecast + forecast_length
    idx_test = np.arange(start_forecast, end_forecast)

    test_target_ts = np.ravel(np.array(df_test['sea_height']))
    test_target_ts = test_target_ts[:forecast_length]
    dataset_to_validate = InputData(idx=idx_test,
                                    features=train_feature_ts,
                                    target=test_target_ts,
                                    task=task_to_solve,
                                    data_type=DataTypesEnum.ts)

    return dataset_to_train, dataset_to_validate
Exemple #19
0
def get_synthetic_ts_data_linear(n_steps=1000,
                                 forecast_length=1,
                                 max_window_size=50):
    simulated_data = np.asarray([float(_) for _ in (np.arange(0, n_steps))])

    task = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size,
                            return_all_steps=False,
                            make_future_prediction=False))

    data = InputData(idx=np.arange(0, n_steps),
                     features=simulated_data,
                     target=simulated_data,
                     task=task,
                     data_type=DataTypesEnum.ts)

    return train_test_data_setup(data, shuffle_flag=False)
Exemple #20
0
def run_metocean_forecasting_problem(train_file_path,
                                     test_file_path,
                                     forecast_length=1,
                                     max_window_size=32,
                                     is_visualise=False):
    # specify the task to solve
    task_to_solve = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size))

    full_path_train = os.path.join(str(project_root()), train_file_path)
    dataset_to_train = InputData.from_csv(full_path_train,
                                          task=task_to_solve,
                                          data_type=DataTypesEnum.ts)

    # a dataset for a final validation of the composed model
    full_path_test = os.path.join(str(project_root()), test_file_path)
    dataset_to_validate = InputData.from_csv(full_path_test,
                                             task=task_to_solve,
                                             data_type=DataTypesEnum.ts)

    chain_simple = TsForecastingChain(PrimaryNode('linear'))
    chain_simple.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_simple = calculate_validation_metric(
        chain_simple.predict(dataset_to_validate),
        dataset_to_validate,
        f'full-simple_{forecast_length}',
        is_visualise=is_visualise)
    print(f'RMSE simple: {rmse_on_valid_simple}')

    chain_composite_lstm = get_composite_chain()
    chain_composite_lstm.fit(input_data=dataset_to_train, verbose=False)
    rmse_on_valid_lstm_only = calculate_validation_metric(
        chain_composite_lstm.predict(dataset_to_validate),
        dataset_to_validate,
        f'full-lstm-only_{forecast_length}',
        is_visualise=is_visualise)
    print(f'RMSE LSTM composite: {rmse_on_valid_lstm_only}')

    return rmse_on_valid_simple
Exemple #21
0
def get_synthetic_ts_data_period(n_steps=1000, forecast_length=5):
    simulated_data = ArmaProcess().generate_sample(nsample=n_steps)
    x1 = np.arange(0, n_steps)
    x2 = np.arange(0, n_steps) + 1

    simulated_data = simulated_data + x1 * 0.0005 - x2 * 0.0001

    periodicity = np.sin(x1 / 50)

    simulated_data = simulated_data + periodicity

    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=forecast_length))

    data = InputData(idx=np.arange(0, n_steps),
                     features=simulated_data,
                     target=simulated_data,
                     task=task,
                     data_type=DataTypesEnum.ts)
    a, b = train_test_data_setup(data)
    return train_test_data_setup(data)
Exemple #22
0
def get_ts_data(n_steps=80, forecast_length=5):
    """ Prepare data from csv file with time series and take needed number of
    elements

    :param n_steps: number of elements in time series to take
    :param forecast_length: the length of forecast
    """
    project_root_path = str(fedot_project_root())
    file_path = os.path.join(project_root_path, 'test/data/simple_time_series.csv')
    df = pd.read_csv(file_path)

    time_series = np.array(df['sea_height'])[:n_steps]
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=forecast_length))

    data = InputData(idx=np.arange(0, len(time_series)),
                     features=time_series,
                     target=time_series,
                     task=task,
                     data_type=DataTypesEnum.ts)
    return train_test_data_setup(data)
Exemple #23
0
def test_api_cv_correct():
    """ Checks if the composer works correctly when using cross validation for
    time series through api """
    folds = 2
    _, forecast_len, validation_blocks, time_series = configure_experiment()
    composer_params = {
        'max_depth': 1,
        'max_arity': 2,
        'timeout': 0.05,
        'preset': 'ultra_light',
        'cv_folds': folds,
        'validation_blocks': validation_blocks
    }
    task_parameters = TsForecastingParams(forecast_length=forecast_len)

    model = Fedot(problem='ts_forecasting',
                  composer_params=composer_params,
                  task_params=task_parameters,
                  verbose_level=2)
    fedot_model = model.fit(features=time_series)
    is_succeeded = True
    assert is_succeeded
    def _chain_fit_predict(self, timeseries_train: np.array, len_gap: int,
                           max_window_size: int):
        """
        The method makes a prediction as a sequence of elements based on a
        training sample. There are two main parts: fit model and predict.

        :param timeseries_train: part of the time series for training the model
        :param len_gap: number of elements in the gap
        :param max_window_size: window length
        :return: array without gaps
        """

        task = Task(
            TaskTypesEnum.ts_forecasting,
            TsForecastingParams(forecast_length=len_gap,
                                max_window_size=max_window_size,
                                return_all_steps=False,
                                make_future_prediction=True))

        input_data = InputData(idx=np.arange(0, len(timeseries_train)),
                               features=None,
                               target=timeseries_train,
                               task=task,
                               data_type=DataTypesEnum.ts)

        # Making predictions for the missing part in the time series
        self.chain.fit_from_scratch(input_data)

        # "Test data" for making prediction for a specific length
        test_data = InputData(idx=np.arange(0, len_gap),
                              features=None,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

        predicted_values = self.chain.forecast(
            initial_data=input_data, supplementary_data=test_data).predict
        return predicted_values
Exemple #25
0
def get_synthetic_ts_data_custom(n_steps=6000,
                                 forecast_length=2,
                                 max_window_size=2,
                                 with_exog: bool = True) -> InputData:
    task = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=forecast_length,
                            max_window_size=max_window_size,
                            return_all_steps=False,
                            make_future_prediction=True))

    exog_features = np.asarray(
        [10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0])
    if not with_exog:
        # move target to features
        exog_features = None

    input_data = InputData(idx=np.arange(0, n_steps),
                           features=exog_features,
                           target=np.asarray([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
                           task=task,
                           data_type=DataTypesEnum.ts)
    return input_data
Exemple #26
0
def test_ts_single_chain_model_without_multiotput_support():
    time_series = generate_synthetic_data(10)
    len_forecast = 2
    train_part = time_series[:-len_forecast]
    test_part = time_series[-len_forecast:]

    task = Task(
        TaskTypesEnum.ts_forecasting,
        TsForecastingParams(forecast_length=len_forecast,
                            max_window_size=2,
                            return_all_steps=False,
                            make_future_prediction=True))

    train_data = InputData(idx=np.arange(0, len(train_part)),
                           features=None,
                           target=train_part,
                           task=task,
                           data_type=DataTypesEnum.ts)

    for model_id in ['xgbreg', 'gbr', 'adareg', 'svr', 'sgdr']:
        chain = TsForecastingChain(PrimaryNode(model_id))

        # making predictions for the missing part in the time series
        chain.fit_from_scratch(train_data)

        # data for making prediction for a specific length
        test_data = InputData(idx=np.arange(0, len_forecast),
                              features=None,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

        predicted_values = chain.forecast(initial_data=train_data,
                                          supplementary_data=test_data).predict

        mae = mean_absolute_error(test_part, predicted_values)
        assert mae < 50
Exemple #27
0
def test_ts_single_pipeline_model_without_multiotput_support():
    time_series = generate_synthetic_data(20)
    len_forecast = 2
    train_part = time_series[:-len_forecast]
    test_part = time_series[-len_forecast:]

    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=len_forecast))

    train_data = InputData(idx=np.arange(0, len(train_part)),
                           features=train_part,
                           target=train_part,
                           task=task,
                           data_type=DataTypesEnum.ts)

    start_forecast = len(train_part)
    end_forecast = start_forecast + len_forecast
    idx_for_predict = np.arange(start_forecast, end_forecast)

    # Data for making prediction for a specific length
    test_data = InputData(idx=idx_for_predict,
                          features=train_part,
                          target=test_part,
                          task=task,
                          data_type=DataTypesEnum.ts)

    for model_id in ['xgbreg', 'gbr', 'adareg', 'svr', 'sgdr']:
        pipeline = get_simple_ts_pipeline(model_root=model_id, window_size=2)

        # making predictions for the missing part in the time series
        pipeline.fit_from_scratch(train_data)
        predicted_values = pipeline.predict(test_data)
        pipeline_forecast = np.ravel(np.array(predicted_values.predict))

        test_part = np.ravel(np.array(test_part))
        mae = mean_absolute_error(test_part, pipeline_forecast)
        assert mae < 50
Exemple #28
0
def synthetic_univariate_ts():
    """ Method returns InputData for classical time series forecasting task """
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=forecast_length))
    # Simple time series to process
    ts_train = np.array(
        [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130])
    ts_test = np.array([140, 150, 160, 170])

    # Prepare train data
    train_input = InputData(idx=np.arange(0, len(ts_train)),
                            features=ts_train,
                            target=ts_train,
                            task=task,
                            data_type=DataTypesEnum.ts)

    start_forecast = len(ts_train)
    end_forecast = start_forecast + forecast_length
    predict_input = InputData(idx=np.arange(start_forecast, end_forecast),
                              features=ts_train,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)
    return train_input, predict_input, ts_test
Exemple #29
0
    def _chain_fit_predict(self, timeseries_train: np.array, len_gap: int,
                           max_window_size: int):
        """
        The method makes a prediction as a sequence of elements based on a
        training sample. There are two main parts: fit model and predict.

        :param timeseries_train: part of the time series for training the model
        :param len_gap: number of elements in the gap
        :param max_window_size: window length
        :return: array without gaps
        """

        task = Task(
            TaskTypesEnum.ts_forecasting,
            TsForecastingParams(forecast_length=len_gap,
                                max_window_size=max_window_size,
                                return_all_steps=False,
                                make_future_prediction=True))

        input_data = InputData(idx=np.arange(0, len(timeseries_train)),
                               features=None,
                               target=timeseries_train,
                               task=task,
                               data_type=DataTypesEnum.ts)

        metric_function = MetricsRepository().metric_by_id(
            RegressionMetricsEnum.RMSE)

        available_model_types_primary = [
            'linear', 'lasso', 'ridge', 'trend_data_model',
            'residual_data_model'
        ]

        available_model_types_secondary = [
            'rfr', 'linear', 'knnreg', 'gbr', 'ridge', 'lasso', 'svr'
        ]

        composer_requirements = GPComposerRequirements(
            primary=available_model_types_primary,
            secondary=available_model_types_secondary,
            max_arity=3,
            max_depth=4,
            pop_size=5,
            num_of_generations=5,
            crossover_prob=0.1,
            mutation_prob=0.8,
            max_lead_time=datetime.timedelta(minutes=20))

        builder = FixedStructureComposerBuilder(task=task).with_requirements(composer_requirements) \
            .with_metrics(metric_function).with_initial_chain(self.chain)
        composer = builder.build()

        obtained_chain = composer.compose_chain(data=input_data,
                                                is_visualise=False)

        # Making predictions for the missing part in the time series
        obtained_chain.__class__ = TsForecastingChain
        obtained_chain.fit_from_scratch(input_data)

        print(f'\n Размер полученной цепочки {len(obtained_chain.nodes)} \n')

        # "Test data" for making prediction for a specific length
        test_data = InputData(idx=np.arange(0, len_gap),
                              features=None,
                              target=None,
                              task=task,
                              data_type=DataTypesEnum.ts)

        predicted_values = obtained_chain.forecast(
            initial_data=input_data, supplementary_data=test_data).predict
        return predicted_values
def forecasting_accuracy(path, prediction_len, vis=True):
    mapes_per_model = []
    models = []
    files = []

    for file_name in ['Synthetic.csv', 'Sea_hour.csv', 'Sea_10_240.csv']:
        # Исходный файл с пропусками
        gap_path = os.path.join(path, file_name)
        gap_df = pd.read_csv(gap_path)
        gap_df['Date'] = pd.to_datetime(gap_df['Date'])

        # Простые методы
        linear_path = os.path.join(os.path.join(path, 'linear'), file_name)
        linear_df = pd.read_csv(linear_path)
        local_poly_path = os.path.join(os.path.join(path, 'poly'), file_name)
        local_poly_df = pd.read_csv(local_poly_path)
        batch_poly_path = os.path.join(os.path.join(path, 'batch_poly'), file_name)
        batch_poly_df = pd.read_csv(batch_poly_path)

        # Методы восстановления пропусков средствами языка R
        kalman_path = os.path.join(os.path.join(path, 'kalman'), file_name)
        kalman_df = pd.read_csv(kalman_path)
        ma_path = os.path.join(os.path.join(path, 'ma'), file_name)
        ma_df = pd.read_csv(ma_path)
        spline_path = os.path.join(os.path.join(path, 'spline'), file_name)
        spline_df = pd.read_csv(spline_path)

        # Методы восстановления пропусков FEDOT
        fedot_ridge_30_path = os.path.join(os.path.join(path, 'fedot_ridge_30'), file_name)
        fedot_ridge_30_df = pd.read_csv(fedot_ridge_30_path)
        fedot_ridge_100_path = os.path.join(os.path.join(path, 'fedot_ridge_100'), file_name)
        fedot_ridge_100_df = pd.read_csv(fedot_ridge_100_path)
        fedot_compose = os.path.join(os.path.join(path, 'fedot_composing'), file_name)
        fedot_compose_df = pd.read_csv(fedot_compose)

        # Исходный временной ряд без пропусков
        arr_parameter = np.array(gap_df['Height'])
        # Временной ряд с пропусками
        arr_mask = np.array(gap_df['gap'])
        ids_gaps = np.ravel(np.argwhere(arr_mask == -100.0))

        array_gaps = np.ma.masked_where(arr_mask == -100.0, arr_mask)

        if vis:
            plt.plot(gap_df['Date'], arr_parameter, c='red', alpha=0.2)
            for index in ids_gaps:
                plt.plot([gap_df['Date'][index], gap_df['Date'][index]], [min(arr_parameter), arr_parameter[index]],
                         c='red', alpha=0.05)
            plt.plot(gap_df['Date'], array_gaps, c='blue', alpha=1.0)
            plt.ylabel('Sea level, m', fontsize=15)
            plt.xlabel('Date', fontsize=15)
            plt.grid()
            plt.show()

        withoutgap_arr_linear = np.array(linear_df['gap'])
        withoutgap_arr_local = np.array(local_poly_df['gap'])
        withoutgap_arr_batch = np.array(batch_poly_df['gap'])

        withoutgap_arr_kalman = np.array(kalman_df['gap'])
        withoutgap_arr_ma = np.array(ma_df['gap'])
        withoutgap_arr_spline = np.array(spline_df['gap'])

        withoutgap_arr_ridge_30 = np.array(fedot_ridge_30_df['gap'])
        withoutgap_arr_ridge_100 = np.array(fedot_ridge_100_df['gap'])
        withoutgap_arr_compose = np.array(fedot_compose_df['gap'])

        if vis:
            plt.plot(gap_df['Date'], arr_parameter, c='green', alpha=0.5,
                     label='Actual values')
            plt.plot(gap_df['Date'], withoutgap_arr_linear, c='red', alpha=0.5,
                     label='Linear interpolation')
            plt.plot(gap_df['Date'], withoutgap_arr_local, c='orange', alpha=0.5,
                     label='Local polynomial approximation')
            plt.plot(gap_df['Date'], withoutgap_arr_batch, c='purple', alpha=0.5,
                     label='Batch polynomial approximation')
            plt.plot(gap_df['Date'], array_gaps, c='blue', alpha=1.0)
            plt.ylabel('Sea level, m', fontsize=15)
            plt.xlabel('Date', fontsize=15)
            plt.grid()
            plt.legend(fontsize=15)
            plt.show()

            plt.plot(gap_df['Date'], arr_parameter, c='green', alpha=0.5,
                     label='Actual values')
            plt.plot(gap_df['Date'], withoutgap_arr_kalman, c='red', alpha=0.5,
                     label='Kalman filtering')
            plt.plot(gap_df['Date'], withoutgap_arr_ma, c='orange', alpha=0.5,
                     label='Moving average')
            plt.plot(gap_df['Date'], withoutgap_arr_spline, c='purple', alpha=0.5,
                     label='Spline interpolation')
            plt.plot(gap_df['Date'], array_gaps, c='blue', alpha=1.0)
            plt.ylabel('Sea level, m', fontsize=15)
            plt.xlabel('Date', fontsize=15)
            plt.grid()
            plt.legend(fontsize=15)
            plt.show()

            plt.plot(gap_df['Date'], arr_parameter, c='green', alpha=0.5,
                     label='Actual values')
            plt.plot(gap_df['Date'], withoutgap_arr_batch, c='red',
                     alpha=0.5,
                     label='Batch polynomial approximation')
            plt.plot(gap_df['Date'], withoutgap_arr_kalman, c='orange', alpha=0.5,
                     label='Kalman filtering')
            plt.plot(gap_df['Date'], withoutgap_arr_ridge_30, c='purple', alpha=0.5,
                     label='Ridge 30 ws')
            plt.plot(gap_df['Date'], array_gaps, c='blue', alpha=1.0)
            plt.ylabel('Sea level, m', fontsize=15)
            plt.xlabel('Date', fontsize=15)
            plt.grid()
            plt.legend(fontsize=15)
            plt.show()

        train_part = arr_parameter[:-prediction_len]
        test_part = arr_parameter[-prediction_len:]

        # Подготавливаем часть временного ряда с восстановленными значениями
        train_part_linear = withoutgap_arr_linear[:-prediction_len]
        train_part_local = withoutgap_arr_local[:-prediction_len]
        train_part_batch = withoutgap_arr_batch[:-prediction_len]

        train_part_kalman = withoutgap_arr_kalman[:-prediction_len]
        train_part_ma = withoutgap_arr_ma[:-prediction_len]
        train_part_stine = withoutgap_arr_spline[:-prediction_len]

        train_part_ridge_30 = withoutgap_arr_ridge_30[:-prediction_len]
        train_part_ridge_100 = withoutgap_arr_ridge_100[:-prediction_len]
        train_part_compose = withoutgap_arr_compose[:-prediction_len]

        if file_name == 'Hour_data_m.csv':
            max_window_size = 50
        else:
            max_window_size = 500
        for sample, model in zip([train_part, train_part_linear, train_part_local, train_part_batch,
                                  train_part_kalman, train_part_ma, train_part_stine, train_part_ridge_30,
                                  train_part_ridge_100, train_part_compose],
                                 ['Original', 'Linear interpolation', 'Local polynomial approximation',
                                  'Batch polynomial approximation', 'Kalman filtering', 'Moving average',
                                  'Spline interpolation', 'Ridge forward 30 ws', 'Ridge forward 100 ws',
                                  'Chain compose']):
            node_first = PrimaryNode('ridge')
            node_second = PrimaryNode('ridge')
            node_trend_model = SecondaryNode('linear', nodes_from=[node_first])
            node_residual_model = SecondaryNode('linear', nodes_from=[node_second])

            node_final = SecondaryNode('svr', nodes_from=[node_trend_model,
                                                          node_residual_model])
            chain = TsForecastingChain(node_final)

            task = Task(TaskTypesEnum.ts_forecasting,
                        TsForecastingParams(forecast_length=prediction_len,
                                            max_window_size=max_window_size,
                                            return_all_steps=False,
                                            make_future_prediction=True))

            input_data = InputData(idx=np.arange(0, len(sample)),
                                   features=None,
                                   target=sample,
                                   task=task,
                                   data_type=DataTypesEnum.ts)

            chain.fit_from_scratch(input_data)

            # "Test data" for making prediction for a specific length
            test_data = InputData(idx=np.arange(0, prediction_len),
                                  features=None,
                                  target=None,
                                  task=task,
                                  data_type=DataTypesEnum.ts)

            predicted_values = chain.forecast(initial_data=input_data,
                                              supplementary_data=test_data).predict

            print(model)
            MAE = mean_absolute_error(test_part, predicted_values)
            print('Mean absolute error -', round(MAE, 4))

            RMSE = (mean_squared_error(test_part, predicted_values)) ** 0.5
            print('RMSE -', round(RMSE, 4))

            MedianAE = median_absolute_error(test_part, predicted_values)
            print('Median absolute error -', round(MedianAE, 4))

            mape = mean_absolute_percentage_error(test_part, predicted_values)
            print('MAPE -', round(mape, 4), '\n')

            if file_name == 'Sea_10_240.csv':
                plt.plot(gap_df['Date'], arr_parameter, c='green', alpha=0.5, label='Actual values')
                plt.plot(gap_df['Date'][:-prediction_len], sample, c='blue', label='Restored series')
                plt.plot(gap_df['Date'][-prediction_len:], predicted_values, c='red', alpha=0.5, label='Model forecast')
                plt.ylabel('Sea level, m', fontsize=15)
                plt.xlabel('Date', fontsize=15)
                plt.grid()
                plt.title(model, fontsize=15)
                plt.legend(fontsize=15)
                plt.show()

            models.append(model)
            mapes_per_model.append(mape)
            files.append(file_name)

    local_df = pd.DataFrame({'MAPE': mapes_per_model,
                             'Model': models,
                             'File': files})

    for model in local_df['Model'].unique():
        local_local_df = local_df[local_df['Model'] == model]
        mape_arr = np.array(local_local_df['MAPE'])

        print(f'Среднее значение ошибки для модели {model} - {np.mean(mape_arr)}')
        for file in local_local_df['File'].unique():
            l_local_local_df = local_local_df[local_local_df['File'] == file]
            print(f'{model}, {file}, MAPE - {float(l_local_local_df["MAPE"])}')