def _convert_to_output( prediction, predict_data: InputData, output_data_type: DataTypesEnum = DataTypesEnum.table ) -> OutputData: """ Method convert prediction into OutputData if it is not this type yet :param prediction: output from model implementation :param predict_data: InputData used for prediction :param output_data_type: DataTypesEnum for output :return : prediction as OutputData """ if type(prediction) is not OutputData: # Wrap prediction as OutputData converted = OutputData(idx=predict_data.idx, features=predict_data.features, predict=prediction, task=predict_data.task, target=predict_data.target, data_type=output_data_type) else: converted = prediction return converted
def _in_sample_prediction(pipeline, data, validation_blocks): """ Performs in-sample pipeline validation for time series prediction """ # Get number of validation blocks per each fold horizon = data.task.task_params.forecast_length * validation_blocks predicted_values = in_sample_ts_forecast(pipeline=pipeline, input_data=data, horizon=horizon) # Clip actual data by the forecast horizon length actual_values = data.target[-horizon:] # Wrap target and prediction arrays into OutputData and InputData results = OutputData(idx=np.arange(0, len(predicted_values)), features=predicted_values, predict=predicted_values, task=data.task, target=predicted_values, data_type=DataTypesEnum.ts) reference_data = InputData(idx=np.arange(0, len(actual_values)), features=actual_values, task=data.task, target=actual_values, data_type=DataTypesEnum.ts) return reference_data, results
def test_data_merge_function(): """ Test check is the merge function can find appropriate intersections of indices. Set {idx_2} ∈ set {idx_1}, so intersection must be = idx_2 """ idx_1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] idx_2 = [2, 3, 4, 5, 6, 7, 8, 9] task = Task(TaskTypesEnum.regression) generated_target = np.random.sample((len(idx_1), 1)) generated_features = np.random.sample((len(idx_1), 2)) list_with_outputs = [] for idx in [idx_1, idx_2]: output_data = OutputData(idx=idx, features=generated_features[idx, :], predict=generated_target[idx, :], task=task, target=generated_target[idx, :], data_type=DataTypesEnum.table) list_with_outputs.append(output_data) idx, features, target = DataMerger(list_with_outputs).merge() assert tuple(idx) == tuple(idx_2)
def predict(self, trained_operation, predict_data: InputData, is_fit_pipeline_stage: bool): return OutputData(idx=predict_data.idx, features=predict_data.features, task=predict_data.task, data_type=predict_data.data_type, target=predict_data.target, predict=predict_data.features)
def predict(self, trained_operation, predict_data: InputData, is_fit_chain_stage: bool) -> OutputData: predicted = predict_lstm(trained_operation, predict_data) converted = OutputData(idx=predict_data.idx, features=predict_data.features, predict=predicted, task=predict_data.task, target=predict_data.target, data_type=DataTypesEnum.ts) return converted
def predict(self, trained_operation, predict_data: InputData, is_fit_chain_stage: bool) -> OutputData: predicted = self._model_specific_predict(trained_operation, predict_data) # Wrap prediction as features for next level converted = OutputData(idx=predict_data.idx, features=predict_data.features, predict=predicted, task=predict_data.task, target=predict_data.target, data_type=DataTypesEnum.table) return converted
def output_dataset(): task = Task(TaskTypesEnum.classification) samples = 1000 x = 10.0 * np.random.rand(samples, ) - 5.0 x = np.expand_dims(x, axis=1) threshold = 0.5 y = 1.0 / (1.0 + np.exp(np.power(x, -1.0))) classes = np.array([0.0 if val <= threshold else 1.0 for val in y]) classes = np.expand_dims(classes, axis=1) data = OutputData(idx=np.arange(0, 100), features=x, predict=classes, task=task, data_type=DataTypesEnum.table) return data
def test_get_compound_mask_correct(): """ Checking whether the procedure for combining lists with keys is performed correctly for features_mask """ synthetic_mask = {'input_ids': [0, 0, 1, 1], 'flow_lens': [1, 1, 0, 0]} output_example = OutputData( idx=[0, 0], features=[0, 0], predict=[0, 0], task=Task(TaskTypesEnum.regression), target=[0, 0], data_type=DataTypesEnum.table, supplementary_data=SupplementaryData(features_mask=synthetic_mask)) mask = output_example.supplementary_data.get_compound_mask() assert ('01', '01', '10', '10') == tuple(mask)
def _convert_to_output_function(input_data, transformed_features, data_type=DataTypesEnum.table): """ Function prepare prediction of operation as OutputData object :param input_data: data with features, target and ids to process :param transformed_features: transformed features :param data_type: type of output data """ # After preprocessing operations by default we get tabular data converted = OutputData(idx=input_data.idx, features=input_data.features, predict=transformed_features, task=input_data.task, target=input_data.target, data_type=data_type) return converted
def generate_outputs(): """ Function for simple case with non-equal outputs in list """ idx_1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] idx_2 = [2, 3, 4, 5, 6, 7, 8, 9] task = Task(TaskTypesEnum.regression) generated_target = np.random.sample((len(idx_1), 1)) generated_features = np.random.sample((len(idx_1), 2)) list_with_outputs = [] for idx, data_flow_len in zip([idx_1, idx_2], [1, 0]): data_info = SupplementaryData(data_flow_length=data_flow_len) output_data = OutputData(idx=idx, features=generated_features[idx, :], predict=generated_target[idx, :], task=task, target=generated_target[idx, :], data_type=DataTypesEnum.table, supplementary_data=data_info) list_with_outputs.append(output_data) return list_with_outputs, idx_1, idx_2
def forecast(self, initial_data: InputData, supplementary_data: InputData) -> OutputData: """Generates the time series forecast with a sliding window using pre-fitted chain. :param initial_data: the initial condition for the forecasting (should be greater or equals to max_window_size) :param supplementary_data: the data that should be available during the forecast: idx for the forecasted steps and optional exogenous variables (variables that are received from an external source instead of forecasting in place and used as features of the forecasting model to increase the quality of of forecast) :return: forecasted time series """ if not self.is_all_cache_actual(): raise ValueError( 'Chain for the time series forecasting was not fitted yet.') if supplementary_data.task.task_type is not TaskTypesEnum.ts_forecasting: raise ValueError( 'TsForecastingChain can be used for the ts_forecasting task only.' ) forecast_length = supplementary_data.task.task_params.forecast_length supplementary_data_for_forecast = copy(supplementary_data) supplementary_data_for_forecast.task.task_params.make_future_prediction = True initial_data_for_forecast = copy(initial_data) initial_data_for_forecast.task.task_params.make_future_prediction = True # check if predict features contains additional (exogenous) variables with_exog = supplementary_data_for_forecast.features is not None # initial data for the first prediction pre_history_start = ( len(initial_data_for_forecast.idx) - initial_data_for_forecast.task.task_params.max_window_size) pre_history_end = len(initial_data_for_forecast.idx) data_for_forecast = initial_data_for_forecast.subset( start=pre_history_start, end=pre_history_end) full_prediction = [] forecast_steps_num = int( np.ceil( len(supplementary_data_for_forecast.idx) / forecast_length)) for forecast_step in range(forecast_steps_num): stepwise_prediction = self.predict(data_for_forecast).predict if len(stepwise_prediction.shape) > 1: # multi-dim prediction stepwise_prediction = stepwise_prediction[ -1, :-forecast_length] full_prediction.extend(stepwise_prediction) else: # single-dim prediction stepwise_prediction = list( stepwise_prediction[-forecast_length:]) full_prediction.extend(stepwise_prediction) # add additional variable from external source if with_exog: data_for_forecast = _prepare_exog_features( data_for_forecast, supplementary_data_for_forecast, stepwise_prediction, forecast_step, forecast_length) else: predicted_ts = np.append(data_for_forecast.target, stepwise_prediction) data_for_forecast.target = np.stack(predicted_ts) data_for_forecast.features = data_for_forecast.target data_for_forecast.idx = _extend_idx_for_prediction( data_for_forecast.idx, forecast_length) full_prediction = full_prediction[0:len(supplementary_data_for_forecast .idx)] output_data = OutputData( idx=supplementary_data_for_forecast.idx, features=supplementary_data_for_forecast.features, predict=np.asarray(full_prediction), task=supplementary_data_for_forecast.task, data_type=supplementary_data_for_forecast.data_type) return output_data
def output_from_prediction(self, input_data, prediction): return OutputData(idx=input_data.idx, features=input_data.features, predict=prediction, task=input_data.task, data_type=self.model.output_datatype(input_data.data_type))