def test_parallelized_predictor(): dataset = ListDataset( data_iter=[{ "start": "2012-01-01", "target": (np.zeros(20) + i).tolist() } for i in range(300)], freq="1H", ) base_predictor = IdentityPredictor(freq="1H", prediction_length=10, num_samples=100) predictor = ParallelizedPredictor(base_predictor=base_predictor, num_workers=10, chunk_size=2) predictions = list(base_predictor.predict(dataset)) parallel_predictions = list(predictor.predict(dataset)) assert len(predictions) == len(parallel_predictions) for p, pp in zip(predictions, parallel_predictions): assert np.all(p.samples == pp.samples) assert np.all(p.index == pp.index)
def generate_forecasts( predictor: Predictor, dataset: Dataset, num_samples: int = 100, parallelize: bool = False, ) -> Tuple[QuantileForecasts, float]: """ Generates the predictions of the given predictor for the provided dataset. The returned prediction object provides the forecasts along with some metadata. Args: predictor: The predictor which is used to make forecasts. dataset: The GluonTS dataset which is used for testing. num_samples: The number of samples to use for making predictions. parallelize: Whether predictions ought to be parallelized. Returns: The forecasts for the dataset. The average latency for generating a single forecast. """ if parallelize: predictor = ParallelizedPredictor(predictor, num_workers=os.cpu_count()) # First, perform the predictions... tic = time.time() forecast_pred, _ = make_evaluation_predictions(dataset, predictor, num_samples) # ...and compute the quantiles quantiles = [f"0.{i+1}" for i in range(9)] forecasts = [] for i, forecast in tqdm( enumerate(forecast_pred), total=maybe_len(dataset), disable=not env.use_tqdm, ): result = None if isinstance(forecast, QuantileForecast): if forecast.forecast_keys == quantiles: result = forecast elif isinstance(forecast, SampleForecast): quantile_forecast = forecast.to_quantile_forecast( quantiles) # type: ignore result = quantile_forecast if result is None: # If none of the above checks added a quantile forecast, we resort to a method that # should work on all types of forecasts result = QuantileForecast( forecast_arrays=np.stack( [forecast.quantile(q) for q in quantiles], axis=0), start_date=forecast.start_date, freq=forecast.freq, forecast_keys=quantiles, item_id=forecast.item_id, ) if result.item_id is None: result.item_id = i forecasts.append(result) toc = time.time() # Then, we compute the prediction latency latency = (toc - tic) / len(dataset) if parallelize: # We observed that N CPUs only brought a speedup of ~N/2 latency = latency * (cast(int, os.cpu_count()) / 2) # And convert the list of forecasts into a QuantileForecasts object quantile_forecasts = QuantileForecasts( values=np.stack([f.forecast_array for f in forecasts]), start_dates=np.array([f.start_date for f in forecasts]), item_ids=np.array([str(f.item_id) for f in forecasts]), freq=to_offset(forecasts[0].freq), # type: ignore quantiles=forecasts[0].forecast_keys, ) return quantile_forecasts, latency