Esempio n. 1
0
    def _convert_to_output(
            prediction,
            predict_data: InputData,
            output_data_type: DataTypesEnum = DataTypesEnum.table
    ) -> OutputData:
        """ Method convert prediction into OutputData if it is not this type yet

        :param prediction: output from model implementation
        :param predict_data: InputData used for prediction
        :param output_data_type: DataTypesEnum for output

        :return : prediction as OutputData
        """

        if type(prediction) is not OutputData:
            # Wrap prediction as OutputData
            converted = OutputData(idx=predict_data.idx,
                                   features=predict_data.features,
                                   predict=prediction,
                                   task=predict_data.task,
                                   target=predict_data.target,
                                   data_type=output_data_type)
        else:
            converted = prediction

        return converted
Esempio n. 2
0
    def _in_sample_prediction(pipeline, data, validation_blocks):
        """ Performs in-sample pipeline validation for time series prediction """

        # Get number of validation blocks per each fold
        horizon = data.task.task_params.forecast_length * validation_blocks

        predicted_values = in_sample_ts_forecast(pipeline=pipeline,
                                                 input_data=data,
                                                 horizon=horizon)

        # Clip actual data by the forecast horizon length
        actual_values = data.target[-horizon:]

        # Wrap target and prediction arrays into OutputData and InputData
        results = OutputData(idx=np.arange(0, len(predicted_values)),
                             features=predicted_values,
                             predict=predicted_values,
                             task=data.task,
                             target=predicted_values,
                             data_type=DataTypesEnum.ts)
        reference_data = InputData(idx=np.arange(0, len(actual_values)),
                                   features=actual_values,
                                   task=data.task,
                                   target=actual_values,
                                   data_type=DataTypesEnum.ts)

        return reference_data, results
Esempio n. 3
0
def test_data_merge_function():
    """ Test check is the merge function can find appropriate intersections of
    indices. Set {idx_2} ∈ set {idx_1}, so intersection must be = idx_2
    """

    idx_1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
    idx_2 = [2, 3, 4, 5, 6, 7, 8, 9]

    task = Task(TaskTypesEnum.regression)
    generated_target = np.random.sample((len(idx_1), 1))
    generated_features = np.random.sample((len(idx_1), 2))

    list_with_outputs = []
    for idx in [idx_1, idx_2]:
        output_data = OutputData(idx=idx,
                                 features=generated_features[idx, :],
                                 predict=generated_target[idx, :],
                                 task=task,
                                 target=generated_target[idx, :],
                                 data_type=DataTypesEnum.table)
        list_with_outputs.append(output_data)

    idx, features, target = DataMerger(list_with_outputs).merge()

    assert tuple(idx) == tuple(idx_2)
Esempio n. 4
0
 def predict(self, trained_operation, predict_data: InputData,
             is_fit_pipeline_stage: bool):
     return OutputData(idx=predict_data.idx,
                       features=predict_data.features,
                       task=predict_data.task,
                       data_type=predict_data.data_type,
                       target=predict_data.target,
                       predict=predict_data.features)
Esempio n. 5
0
    def predict(self, trained_operation, predict_data: InputData,
                is_fit_chain_stage: bool) -> OutputData:

        predicted = predict_lstm(trained_operation, predict_data)
        converted = OutputData(idx=predict_data.idx,
                               features=predict_data.features,
                               predict=predicted,
                               task=predict_data.task,
                               target=predict_data.target,
                               data_type=DataTypesEnum.ts)
        return converted
Esempio n. 6
0
    def predict(self, trained_operation, predict_data: InputData,
                is_fit_chain_stage: bool) -> OutputData:
        predicted = self._model_specific_predict(trained_operation,
                                                 predict_data)
        # Wrap prediction as features for next level
        converted = OutputData(idx=predict_data.idx,
                               features=predict_data.features,
                               predict=predicted,
                               task=predict_data.task,
                               target=predict_data.target,
                               data_type=DataTypesEnum.table)

        return converted
Esempio n. 7
0
def output_dataset():
    task = Task(TaskTypesEnum.classification)

    samples = 1000
    x = 10.0 * np.random.rand(samples, ) - 5.0
    x = np.expand_dims(x, axis=1)
    threshold = 0.5
    y = 1.0 / (1.0 + np.exp(np.power(x, -1.0)))
    classes = np.array([0.0 if val <= threshold else 1.0 for val in y])
    classes = np.expand_dims(classes, axis=1)
    data = OutputData(idx=np.arange(0, 100),
                      features=x,
                      predict=classes,
                      task=task,
                      data_type=DataTypesEnum.table)

    return data
Esempio n. 8
0
def test_get_compound_mask_correct():
    """ Checking whether the procedure for combining lists with keys is
    performed correctly for features_mask """

    synthetic_mask = {'input_ids': [0, 0, 1, 1], 'flow_lens': [1, 1, 0, 0]}
    output_example = OutputData(
        idx=[0, 0],
        features=[0, 0],
        predict=[0, 0],
        task=Task(TaskTypesEnum.regression),
        target=[0, 0],
        data_type=DataTypesEnum.table,
        supplementary_data=SupplementaryData(features_mask=synthetic_mask))

    mask = output_example.supplementary_data.get_compound_mask()

    assert ('01', '01', '10', '10') == tuple(mask)
Esempio n. 9
0
def _convert_to_output_function(input_data,
                                transformed_features,
                                data_type=DataTypesEnum.table):
    """ Function prepare prediction of operation as OutputData object

    :param input_data: data with features, target and ids to process
    :param transformed_features: transformed features
    :param data_type: type of output data
    """

    # After preprocessing operations by default we get tabular data
    converted = OutputData(idx=input_data.idx,
                           features=input_data.features,
                           predict=transformed_features,
                           task=input_data.task,
                           target=input_data.target,
                           data_type=data_type)

    return converted
Esempio n. 10
0
def generate_outputs():
    """ Function for simple case with non-equal outputs in list """
    idx_1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
    idx_2 = [2, 3, 4, 5, 6, 7, 8, 9]

    task = Task(TaskTypesEnum.regression)
    generated_target = np.random.sample((len(idx_1), 1))
    generated_features = np.random.sample((len(idx_1), 2))

    list_with_outputs = []
    for idx, data_flow_len in zip([idx_1, idx_2], [1, 0]):
        data_info = SupplementaryData(data_flow_length=data_flow_len)
        output_data = OutputData(idx=idx,
                                 features=generated_features[idx, :],
                                 predict=generated_target[idx, :],
                                 task=task,
                                 target=generated_target[idx, :],
                                 data_type=DataTypesEnum.table,
                                 supplementary_data=data_info)
        list_with_outputs.append(output_data)

    return list_with_outputs, idx_1, idx_2
Esempio n. 11
0
    def forecast(self, initial_data: InputData,
                 supplementary_data: InputData) -> OutputData:
        """Generates the time series forecast with a sliding window using pre-fitted chain.
        :param initial_data: the initial condition for the forecasting (should be greater or equals to max_window_size)
        :param supplementary_data: the data that should be available during the forecast:
            idx for the forecasted steps and optional exogenous variables
            (variables that are received from an external source instead of forecasting in place and
            used as features of the forecasting model to increase the quality of of forecast)
        :return: forecasted time series
        """

        if not self.is_all_cache_actual():
            raise ValueError(
                'Chain for the time series forecasting was not fitted yet.')

        if supplementary_data.task.task_type is not TaskTypesEnum.ts_forecasting:
            raise ValueError(
                'TsForecastingChain can be used for the ts_forecasting task only.'
            )

        forecast_length = supplementary_data.task.task_params.forecast_length

        supplementary_data_for_forecast = copy(supplementary_data)
        supplementary_data_for_forecast.task.task_params.make_future_prediction = True

        initial_data_for_forecast = copy(initial_data)
        initial_data_for_forecast.task.task_params.make_future_prediction = True

        # check if predict features contains additional (exogenous) variables
        with_exog = supplementary_data_for_forecast.features is not None

        # initial data for the first prediction
        pre_history_start = (
            len(initial_data_for_forecast.idx) -
            initial_data_for_forecast.task.task_params.max_window_size)
        pre_history_end = len(initial_data_for_forecast.idx)

        data_for_forecast = initial_data_for_forecast.subset(
            start=pre_history_start, end=pre_history_end)

        full_prediction = []
        forecast_steps_num = int(
            np.ceil(
                len(supplementary_data_for_forecast.idx) / forecast_length))
        for forecast_step in range(forecast_steps_num):
            stepwise_prediction = self.predict(data_for_forecast).predict
            if len(stepwise_prediction.shape) > 1:
                # multi-dim prediction
                stepwise_prediction = stepwise_prediction[
                    -1, :-forecast_length]
                full_prediction.extend(stepwise_prediction)
            else:
                # single-dim prediction
                stepwise_prediction = list(
                    stepwise_prediction[-forecast_length:])
                full_prediction.extend(stepwise_prediction)

            # add additional variable from external source
            if with_exog:
                data_for_forecast = _prepare_exog_features(
                    data_for_forecast, supplementary_data_for_forecast,
                    stepwise_prediction, forecast_step, forecast_length)
            else:
                predicted_ts = np.append(data_for_forecast.target,
                                         stepwise_prediction)
                data_for_forecast.target = np.stack(predicted_ts)
                data_for_forecast.features = data_for_forecast.target

            data_for_forecast.idx = _extend_idx_for_prediction(
                data_for_forecast.idx, forecast_length)

        full_prediction = full_prediction[0:len(supplementary_data_for_forecast
                                                .idx)]

        output_data = OutputData(
            idx=supplementary_data_for_forecast.idx,
            features=supplementary_data_for_forecast.features,
            predict=np.asarray(full_prediction),
            task=supplementary_data_for_forecast.task,
            data_type=supplementary_data_for_forecast.data_type)

        return output_data
Esempio n. 12
0
File: node.py Progetto: STATAN/FEDOT
 def output_from_prediction(self, input_data, prediction):
     return OutputData(idx=input_data.idx,
                       features=input_data.features,
                       predict=prediction, task=input_data.task,
                       data_type=self.model.output_datatype(input_data.data_type))