def invocations() -> Response: request_data = request.data.decode("utf8").strip() instances = list(map(json.loads, request_data.splitlines())) predictions = [] # we have to take this as the initial start-time since the first # forecast is produced before the loop in predictor.predict start = time.time() forecast_iter = predictor.predict( ListDataset(instances, predictor.freq), num_samples=configuration.num_samples, ) for forecast in forecast_iter: end = time.time() prediction = forecast.as_json_dict(configuration) if DEBUG: prediction["debug"] = {"timing": end - start} predictions.append(prediction) start = time.time() lines = list(map(json.dumps, map(jsonify_floats, predictions))) return Response("\n".join(lines), mimetype="application/jsonlines")
def test_feat_dynamic_real_success(): params = dict(freq="1D", prediction_length=3, prophet_params=dict(n_changepoints=20)) dataset = ListDataset( data_iter=[{ "start": "2017-01-01", "target": np.array([1.0, 2.0, 3.0, 4.0]), "feat_dynamic_real": np.array([ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], ]), }], freq=params["freq"], ) predictor = ProphetPredictor(**params) act_fcst = next(predictor.predict(dataset)) exp_fcst = np.arange(5.0, 5.0 + params["prediction_length"]) assert np.all(np.isclose(act_fcst.quantile(0.1), exp_fcst, atol=0.02)) assert np.all(np.isclose(act_fcst.quantile(0.5), exp_fcst, atol=0.02)) assert np.all(np.isclose(act_fcst.quantile(0.9), exp_fcst, atol=0.02))
def get_dataset(): data_entry_list = [ { "target": np.c_[np.array([0.2, 0.7, 0.2, 0.5, 0.3, 0.3, 0.2, 0.1]), np.array([0, 1, 2, 0, 1, 2, 2, 2]), ].T, "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"), "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"), }, { "target": np.c_[np.array([0.2, 0.1, 0.2, 0.5, 0.4]), np.array([0, 1, 2, 1, 1])].T, "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"), "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"), }, { "target": np.c_[np.array([0.2, 0.7, 0.2, 0.5, 0.1, 0.2, 0.1]), np.array([0, 1, 2, 0, 1, 0, 2]), ].T, "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"), "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"), }, ] return ListDataset(data_entry_list, freq="H", one_dim_target=False)
def test_negative_offset_splitter(): dataset = ListDataset( [ {"item_id": 0, "start": "2021-03-04", "target": [1.0] * 100}, {"item_id": 1, "start": "2021-03-04", "target": [2.0] * 50}, ], freq="D", ) split = OffsetSplitter(prediction_length=7, split_offset=-7).split(dataset) assert [len(t["target"]) for t in split.train] == [93, 43] assert [len(t["target"]) for t in split.test] == [100, 50] rolling_split = OffsetSplitter( prediction_length=7, split_offset=-21 ).rolling_split(dataset, windows=3) assert [len(t["target"]) for t in rolling_split.train] == [79, 29] assert [len(t["target"]) for t in rolling_split.test] == [ 86, 93, 100, 36, 43, 50, ]
def create_list_datasets(self, cut_lengths=[]): """Create timeseries for each identifier tuple and each target. Args: cut_length (int, optional): Remove the last cut_length time steps of each timeseries. Defaults to empty list. Returns: List of gluonts.dataset.common.ListDataset with extra keys for each timeseries """ multivariate_timeseries_per_cut_length = [[] for cut_length in cut_lengths ] if self.timeseries_identifiers_names: for identifiers_values, identifiers_df in self.dataframe.groupby( self.timeseries_identifiers_names): for cut_length_index, cut_length in enumerate(cut_lengths): multivariate_timeseries_per_cut_length[ cut_length_index] += self._create_gluon_multivariate_timeseries( identifiers_df, cut_length, identifiers_values=identifiers_values) else: for cut_length_index, cut_length in enumerate(cut_lengths): multivariate_timeseries_per_cut_length[ cut_length_index] += self._create_gluon_multivariate_timeseries( self.dataframe, cut_length) gluon_list_dataset_per_cut_length = [] for multivariate_timeseries in multivariate_timeseries_per_cut_length: gluon_list_dataset_per_cut_length += [ ListDataset(multivariate_timeseries, freq=self.frequency) ] return gluon_list_dataset_per_cut_length
def get_dataset(**kw): ##check whether dataset is of kind train or test data_path = kw['train_data_path'] if kw['train'] else kw['test_data_path'] #### read from csv file if kw.get("uri_type") == "pickle": data_set = pd.read_pickle(data_path) else: data_set = pd.read_csv(data_path) ### convert to gluont format gluonts_ds = ListDataset([{ FieldName.TARGET: data_set.iloc[i].values, FieldName.START: kw['start'] } for i in range(kw['num_series'])], freq=kw['freq']) if VERBOSE: entry = next(iter(gluonts_ds)) train_series = to_pandas(entry) train_series.plot() save_fig = kw['save_fig'] plt.savefig(save_fig) return gluonts_ds
def load(self, frequency: str, subset_filter: str, training: bool) -> ListDataset: """ Load electricity dataset. :param frequency: :param subset_filter: dates as "from_date:to_date" in "YYYY-mm-dd H" format. :param training: If False then to_date will be extended to 7 days in future. :return: """ dates = subset_filter.split(':') from_date = pd.to_datetime(dates[0]) to_date = pd.to_datetime(dates[1]) if not training: to_date = to_date + relativedelta(hours=24 * 7) items_all = [{ 'item_id': i, 'start': from_date, 'horizon': 24, 'target': values } for i, values in enumerate( self.values[:, self._dates_to_index(from_date, to_date)])] return ListDataset(items_all, freq=frequency)
def test_parallelized_predictor(): dataset = ListDataset( data_iter=[ {"start": "2012-01-01", "target": (np.zeros(20) + i).tolist()} for i in range(300) ], freq="1H", ) base_predictor = IdentityPredictor( freq="1H", prediction_length=10, num_samples=100 ) predictor = ParallelizedPredictor( base_predictor=base_predictor, num_workers=10, chunk_size=2 ) predictions = list(base_predictor.predict(dataset)) parallel_predictions = list(predictor.predict(dataset)) assert len(predictions) == len(parallel_predictions) for p, pp in zip(predictions, parallel_predictions): assert np.all(p.samples == pp.samples) assert np.all(p.index == pp.index)
def transform_fn(net, data, input_content_type, output_content_type): try: data = json.loads(data) #How many time-series are included? N = len(data["value"]) #Create dataset test_data = ListDataset( [{"start": datetime.strptime(data["index"], "%Y-%m-%d %H:%M:%S"), "target": np.array(data["value"][n])} for n in range(N) ], freq = data["freq"] ) # prediction forecast_it = net.predict(test_data) forecasts = list(forecast_it) result = [] for n in range(N): result.append(forecasts[n].samples.tolist()) response_body = json.dumps(result) return response_body, output_content_type except Exception as e: print(e) return json.dumps(str(e)), output_content_type
def transform_fn(model, request_body, content_type='application/json', accept_type='application/json'): data = json.loads(request_body) target_test_df = pd.DataFrame(data['value'], index=data['timestamp']) target = target_test_df.values num_series = target_test_df.shape[1] start_dt = target_test_df.index[0] test_lst = [] for i in range(0, num_series): target_vec = target[:, i] dic = {FieldName.TARGET: target_vec, FieldName.START: start_dt} test_lst.append(dic) test_ds = ListDataset(test_lst, freq='1D') response_body = {} forecast_it = model.predict(test_ds) for idx, f in enumerate(forecast_it): response_body[f'item_{idx}'] = f.samples.mean(axis=0).tolist() return json.dumps(response_body)
def test_minute_frequency(): prediction_length = 1 timeseries = { TIMESERIES_KEYS.START: "2021-01-15 12:40:00", TIMESERIES_KEYS.TARGET: np.array([12, 13]), TIMESERIES_KEYS.TARGET_NAME: "target", TIMESERIES_KEYS.TIME_COLUMN_NAME: "date", } frequency = "20min" gluon_dataset = ListDataset([timeseries], freq=frequency) model = Model( "simplefeedforward", model_parameters={"activated": True, "kwargs": {}}, frequency=frequency, prediction_length=prediction_length, epoch=1, batch_size=8, num_batches_per_epoch=5, ) evaluation_forecasts_df = model.train_evaluate(gluon_dataset, gluon_dataset, make_forecasts=True, retrain=True)[2] assert evaluation_forecasts_df["index"].iloc[0] == pd.Timestamp("2021-01-15 13:00:00") trained_model = TrainedModel( predictor=model.predictor, gluon_dataset=gluon_dataset, prediction_length=prediction_length, quantiles=[0.5], include_history=True, ) trained_model.predict() forecasts_df = trained_model.get_forecasts_df(session="2021-01-01", model_label="TEST") assert forecasts_df["date"].iloc[0] == pd.Timestamp("2021-01-15 13:20:00")
def test_feat_dynamic_real_bad_size(): params = dict(freq="1D", prediction_length=3, prophet_params={}) dataset = ListDataset( data_iter=[{ "start": "2017-01-01", "target": np.array([1.0, 2.0, 3.0, 4.0]), "feat_dynamic_real": np.array([ [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], ]), }], freq=params["freq"], ) with pytest.raises(AssertionError) as excinfo: predictor = ProphetPredictor(**params) list(predictor.predict(dataset)) assert str(excinfo.value) == ( "Length mismatch for dynamic real-valued feature #0: " "expected 7, got 6")
def initialize_model() -> nn.HybridBlock: # dummy training data N = 10 # number of time series T = 100 # number of timesteps prediction_length = 24 freq = "1H" custom_dataset = np.zeros(shape=(N, T)) start = pd.Timestamp("01-01-2019", freq=freq) # can be different for each time series train_ds = ListDataset( [{ "target": x, "start": start } for x in custom_dataset[:, :-prediction_length]], freq=freq, ) # create a simple model estimator = SimpleFeedForwardEstimator( num_hidden_dimensions=[10], prediction_length=prediction_length, context_length=T, freq=freq, trainer=Trainer( ctx="cpu", epochs=1, learning_rate=1e-3, num_batches_per_epoch=1, ), ) # train model predictor = estimator.train(train_ds) return predictor.prediction_net
def test_train_loader_goes_over_all_data(num_workers) -> None: batch_size = 4 num_batches_per_epoch = 4 num_time_series = batch_size * num_batches_per_epoch * 3 num_passes = 5 num_epochs = num_passes * 3 simple_data = [{ "start": "2012-01-01", "target": np.random.uniform(size=40).astype(float).tolist(), "item_id": i, } for i in range(num_time_series)] def test_dataset(dataset): class ExactlyOneSampler(InstanceSampler): def __call__(self, ts: np.ndarray, a: int, b: int) -> np.ndarray: window_size = b - a + 1 assert window_size > 0 return np.array([a]) transformation = InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=ExactlyOneSampler(), past_length=10, future_length=5, dummy_value=1.0, ) dl = TrainDataLoader( dataset=dataset, transform=transformation, batch_size=batch_size, stack_fn=partial(batchify, ctx=current_context()), num_workers=num_workers, ) item_ids = defaultdict(int) for epoch in range(num_epochs): for batch in islice(dl, num_batches_per_epoch): for item_id in batch["item_id"]: item_ids[item_id] += 1 for i in range(len(dataset)): assert num_passes - 1 <= item_ids[i] <= num_passes + 1 test_dataset(ListDataset(simple_data, freq="1H")) with tempfile.TemporaryDirectory() as tmpdir: with open(tmpdir + "/data.json", "w") as f: for data in simple_data: json.dump(data, f) f.write("\n") test_dataset(FileDataset(Path(tmpdir), freq="1H")) test_dataset(FileDataset(Path(tmpdir), freq="1H", cache=True))
def test_agg_lags(pred_length, rolling_lags): # create dummy dataset target = np.array([1, 1, 1, 2, 2, 3, 3, 4, 5, 6]) start = pd.Timestamp("01-01-2019 01:00:00", freq="1H") freq = "1H" ds = ListDataset( [{FieldName.TARGET: target, FieldName.START: start}], freq=freq ) # 2H aggregate lags lags_2H = [1, 2, 3, 4, 6] add_agg_lags = AddAggregateLags( target_field=FieldName.TARGET, output_field="lags_2H", pred_length=pred_length, base_freq=freq, agg_freq="2H", agg_lags=lags_2H, rolling_agg=rolling_lags, ) assert add_agg_lags.ratio == 2 train_entry = next(add_agg_lags(iter(ds), is_train=True)) test_entry = next(add_agg_lags(iter(ds), is_train=False)) if rolling_lags: assert ( add_agg_lags.valid_lags == valid_lags_rolling[f"prediction_length_{pred_length}"] ) assert np.allclose( train_entry["lags_2H"], expected_lags_rolling[f"prediction_length_{pred_length}"]["train"], ) assert np.allclose( test_entry["lags_2H"], expected_lags_rolling[f"prediction_length_{pred_length}"]["test"], ) else: assert ( add_agg_lags.valid_lags == valid_lags_calendar[f"prediction_length_{pred_length}"] ) assert np.allclose( train_entry["lags_2H"], expected_lags_calendar[f"prediction_length_{pred_length}"][ "train" ], ) assert np.allclose( test_entry["lags_2H"], expected_lags_calendar[f"prediction_length_{pred_length}"]["test"], )
def log1p_tds(dataset: TrainDatasets) -> TrainDatasets: """Create a new train datasets with targets log-transformed.""" # Implementation note: currently, the only way is to eagerly load all timeseries in memory, and do the transform. train = ListDataset(dataset.train, freq=dataset.metadata.freq) log1p(train) if dataset.test is not None: test = ListDataset(dataset.test, freq=dataset.metadata.freq) log1p(test) else: test = None # fmt: off return TrainDatasets( dataset.metadata.copy(), # Note: pydantic's deep copy. train=train, test=test)
def test_training_bad_seasonality_user_input(self): prediction_length = 1 frequency = "3M" gluon_dataset = ListDataset(self.timeseries, freq=frequency) with pytest.raises(ValueError): estimator = AutoARIMAEstimator(prediction_length=prediction_length, freq=frequency, m=12)
def get_dataset_and_transformation(): # dont recompute, since expensive global _data_cache if _data_cache is not None: return _data_cache # create constant dataset with each time series having # variable length and unique constant integer entries dataset = ConstantDataset( num_steps=CD_NUM_STEPS, num_timeseries=CD_NUM_TIME_SERIES ) list_dataset = list(dataset.train) for i, ts in enumerate(list_dataset): ts["start"] = pd.Timestamp(ts_input=ts["start"], freq=dataset.freq) # get randomness in the ts lengths ts["target"] = np.array( ts["target"] * random.randint(1, CD_MAX_LEN_MULTIPLICATION_FACTOR) ) list_dataset = ListDataset(data_iter=list_dataset, freq=dataset.freq) list_dataset_pred_length = dataset.prediction_length # use every possible time point to split the time series transformation = Chain( [ InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=UniformSplitSampler( p=SPLITTING_SAMPLE_PROBABILITY # THIS IS IMPORTANT FOR THE TEST ), past_length=CONTEXT_LEN, future_length=list_dataset_pred_length, dummy_value=1.0, ), ] ) # original no multiprocessing processed validation dataset train_data_transformed_original = list( ValidationDataLoader( dataset=list_dataset, transform=transformation, batch_size=BATCH_SIZE, num_workers=0, # This is the crucial difference ctx=current_context(), ) ) _data_cache = ( list_dataset, transformation, list_dataset_pred_length, train_data_transformed_original, ) return _data_cache
def make_dummy_datasets_with_features( num_ts: int = 5, start: str = "2018-01-01", freq: str = "D", min_length: int = 5, max_length: int = 10, prediction_length: int = 3, cardinality: List[int] = [], num_feat_dynamic_real: int = 0, ) -> Tuple[ListDataset, ListDataset]: data_iter_train = [] data_iter_test = [] for k in range(num_ts): ts_length = randint(min_length, max_length) perc_zeros = 0.5 mask = np.random.rand(ts_length) < perc_zeros target = np.array([1.0] * ts_length) target[mask] = 0 data_entry_train = { FieldName.START: start, FieldName.TARGET: target, } if len(cardinality) > 0: data_entry_train[FieldName.FEAT_STATIC_CAT] = [ randint(0, c) for c in cardinality ] data_entry_test = data_entry_train.copy() if num_feat_dynamic_real > 0: data_entry_train[FieldName.FEAT_DYNAMIC_REAL] = [ [float(1 + k)] * ts_length for k in range(num_feat_dynamic_real) ] data_entry_test[FieldName.FEAT_DYNAMIC_REAL] = [ [float(1 + k)] * (ts_length + prediction_length) for k in range(num_feat_dynamic_real) ] data_iter_train.append(data_entry_train) data_iter_test.append(data_entry_test) return ( ListDataset(data_iter=data_iter_train, freq=freq), ListDataset(data_iter=data_iter_test, freq=freq), )
def gluonts_dataframe(df): freqed = pd.infer_freq(df.index) if freqed == "MS": freq = "M" # start = df.index[0] + relativedelta(months=1) else: freq = freqed df = ListDataset([{"start": df.index[0], "target": df.values}], freq=freq) return df
def test_multivariate_grouper_test(univariate_ts, multivariate_ts, test_fill_rule, max_target_dim) -> None: univariate_ds = ListDataset(univariate_ts, freq="1D") multivariate_ds = ListDataset(multivariate_ts, freq="1D", one_dim_target=False) grouper = MultivariateGrouper( test_fill_rule=test_fill_rule, num_test_dates=2, max_target_dim=max_target_dim, ) for grouped_data, multivariate_data in zip(grouper(univariate_ds), multivariate_ds): assert (grouped_data["target"] == multivariate_data["target"]).all() assert grouped_data["start"] == multivariate_data["start"]
def _test_nans_in_target(predictor: NPTSPredictor, dataset: Dataset) -> None: """ Test that the model behaves as expected when the target time series contains NaN values. Parameters ---------- predictor the predictor instance to test dataset a dataset (with targets without NaNs) to use as a base for the test """ # a copy of dataset with 90% of the target entries NaNs ds_090pct_nans = ListDataset( data_iter=[ _inject_nans_in_target(data_entry, p=0.9) for data_entry in dataset ], freq=predictor.freq, ) # a copy of dataset with 100% of the target entries NaNs ds_100pct_nans = ListDataset( data_iter=[ _inject_nans_in_target(data_entry, p=1.0) for data_entry in dataset ], freq=predictor.freq, ) # assert that we can tolerate a high percentage of NaNs for forecast in predictor.predict(ds_090pct_nans): assert np.all(np.isfinite(forecast.samples)), "Forecast contains NaNs." # assert that an exception is thrown if 100% of the values are NaN with pytest.raises(GluonTSDataError) as excinfo: for _ in predictor.predict(ds_100pct_nans): pass assert ( f"The last {predictor.context_length} positions of the target time " f"series are all NaN. Please increase the `context_length` " f"parameter of your NPTS model so the last " f"{predictor.context_length} positions of each target contain at " f"least one non-NaN value.") in str(excinfo.value)
def make_dataset(N, train_length): # generates 2 ** N - 1 timeseries with constant increasing values n = 2 ** N - 1 targets = np.arange(n * train_length).reshape((n, train_length)) return ListDataset( [{"start": "2012-01-01", "target": targets[i, :]} for i in range(n)], freq="D", )
def predict(self, dataset: Dataset, **kwargs) -> Iterator[Forecast]: logger = logging.getLogger(__name__) for i, ts in enumerate(dataset, start=1): logger.info(f"training for time series {i} / {len(dataset)}") local_ds = ListDataset([ts], freq=self.freq) trained_pred = self.estimator.train(local_ds) logger.info(f"predicting for time series {i} / {len(dataset)}") predictions = trained_pred.predict(local_ds, **kwargs) for pred in predictions: yield pred
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( time_granularity='1H', feat_static_cat=[ CategoricalFeatureInfo(name='feat_static_cat_000', cardinality='10') ], feat_static_real=[BasicFeatureInfo(name='feat_static_real_000')], ) start_date = '2000-01-01 00:00:00' train_ds = ListDataset( data_iter=[{ 'item': str(i), 'start': start_date, 'target': [float(i)] * 24, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10)], freq=metadata.time_granularity, ) test_ds = ListDataset( data_iter=[{ 'item': str(i), 'start': start_date, 'target': [float(i)] * 30, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10)], freq=metadata.time_granularity, ) info = DatasetInfo( name='constant_dataset', metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo(name="feat_static_cat_000", cardinality="10") ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[{ FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 24, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10)], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[{ FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 30, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10)], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo(name="feat_static_cat_000", cardinality="10") ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[{ "item_id": str(i), "start": start_date, "target": [float(i)] * 24, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10)], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[{ "item_id": str(i), "start": start_date, "target": [float(i)] * 30, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10)], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def invocations() -> Response: request_data = request.data.decode("utf8").strip() # request_data can be empty, but .split() will produce a non-empty # list, which then means we try to decode an empty string, which # causes an error: `''.split() == ['']` if request_data: instances = list(map(json.loads, request_data.split("\n"))) else: instances = [] dataset = ListDataset(instances, predictor.freq) start_time = time.time() if settings.gluonts_batch_timeout > 0: predictions = with_timeout( make_predictions, args=(predictor, dataset, configuration), timeout=settings.gluonts_batch_timeout, ) # predictions are None, when predictor timed out if predictions is None: logger.warning(f"predictor timed out for: {request_data}") FallbackPredictor = forecaster_type_by_name( settings.gluonts_batch_fallback_predictor ) fallback_predictor = FallbackPredictor( freq=predictor.freq, prediction_length=predictor.prediction_length, ) predictions = make_predictions( fallback_predictor, dataset, configuration ) else: predictions = make_predictions(predictor, dataset, configuration) end_time = time.time() scored_instances.append( ScoredInstanceStat( amount=len(predictions), duration=end_time - start_time ) ) log_scored(when=end_time) for forward_field in settings.gluonts_forward_fields: for input_item, prediction in zip(dataset, predictions): prediction[forward_field] = input_item.get(forward_field) lines = list(map(json.dumps, map(jsonify_floats, predictions))) return Response("\n".join(lines), mimetype="application/jsonlines")
def _prepare_train_data(self, dataset: Dataset) -> ListDataset: logging.info("group training time-series to datasets") grouped_data = self._transform_target(self._align_data_entry, dataset) grouped_data = self._restrict_max_dimensionality(grouped_data) grouped_data[FieldName.START] = self.first_timestamp grouped_data[FieldName.FEAT_STATIC_CAT] = [0] return ListDataset([grouped_data], freq=self.frequency, one_dim_target=False)
def trans_df2gluon(dflist,freq="1H"): datadictlist = [] for data in dflist: rawdata =data.copy() rawdata['ds'] = pd.to_datetime(rawdata['ds']) rawdata.set_index('ds',inplace=True) rawdata = rawdata.resample(freq).mean() data_dict = {"start":rawdata.index[0],"target":rawdata.y} datadictlist.append(data_dict) dataset = ListDataset(datadictlist,freq=freq) return dataset