예제 #1
0
파일: app.py 프로젝트: steverab/gluon-ts
    def invocations() -> Response:
        request_data = request.data.decode("utf8").strip()
        instances = list(map(json.loads, request_data.splitlines()))
        predictions = []

        # we have to take this as the initial start-time since the first
        # forecast is produced before the loop in predictor.predict
        start = time.time()

        forecast_iter = predictor.predict(
            ListDataset(instances, predictor.freq),
            num_samples=configuration.num_samples,
        )

        for forecast in forecast_iter:
            end = time.time()
            prediction = forecast.as_json_dict(configuration)

            if DEBUG:
                prediction["debug"] = {"timing": end - start}

            predictions.append(prediction)

            start = time.time()

        lines = list(map(json.dumps, map(jsonify_floats, predictions)))
        return Response("\n".join(lines), mimetype="application/jsonlines")
예제 #2
0
def test_feat_dynamic_real_success():
    params = dict(freq="1D",
                  prediction_length=3,
                  prophet_params=dict(n_changepoints=20))

    dataset = ListDataset(
        data_iter=[{
            "start":
            "2017-01-01",
            "target":
            np.array([1.0, 2.0, 3.0, 4.0]),
            "feat_dynamic_real":
            np.array([
                [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
            ]),
        }],
        freq=params["freq"],
    )

    predictor = ProphetPredictor(**params)

    act_fcst = next(predictor.predict(dataset))
    exp_fcst = np.arange(5.0, 5.0 + params["prediction_length"])

    assert np.all(np.isclose(act_fcst.quantile(0.1), exp_fcst, atol=0.02))
    assert np.all(np.isclose(act_fcst.quantile(0.5), exp_fcst, atol=0.02))
    assert np.all(np.isclose(act_fcst.quantile(0.9), exp_fcst, atol=0.02))
예제 #3
0
def get_dataset():

    data_entry_list = [
        {
            "target":
            np.c_[np.array([0.2, 0.7, 0.2, 0.5, 0.3, 0.3, 0.2, 0.1]),
                  np.array([0, 1, 2, 0, 1, 2, 2, 2]), ].T,
            "start":
            pd.Timestamp("2011-01-01 00:00:00", freq="H"),
            "end":
            pd.Timestamp("2011-01-01 03:00:00", freq="H"),
        },
        {
            "target":
            np.c_[np.array([0.2, 0.1, 0.2, 0.5, 0.4]),
                  np.array([0, 1, 2, 1, 1])].T,
            "start":
            pd.Timestamp("2011-01-01 00:00:00", freq="H"),
            "end":
            pd.Timestamp("2011-01-01 03:00:00", freq="H"),
        },
        {
            "target":
            np.c_[np.array([0.2, 0.7, 0.2, 0.5, 0.1, 0.2, 0.1]),
                  np.array([0, 1, 2, 0, 1, 0, 2]), ].T,
            "start":
            pd.Timestamp("2011-01-01 00:00:00", freq="H"),
            "end":
            pd.Timestamp("2011-01-01 03:00:00", freq="H"),
        },
    ]

    return ListDataset(data_entry_list, freq="H", one_dim_target=False)
예제 #4
0
def test_negative_offset_splitter():
    dataset = ListDataset(
        [
            {"item_id": 0, "start": "2021-03-04", "target": [1.0] * 100},
            {"item_id": 1, "start": "2021-03-04", "target": [2.0] * 50},
        ],
        freq="D",
    )

    split = OffsetSplitter(prediction_length=7, split_offset=-7).split(dataset)

    assert [len(t["target"]) for t in split.train] == [93, 43]
    assert [len(t["target"]) for t in split.test] == [100, 50]

    rolling_split = OffsetSplitter(
        prediction_length=7, split_offset=-21
    ).rolling_split(dataset, windows=3)

    assert [len(t["target"]) for t in rolling_split.train] == [79, 29]
    assert [len(t["target"]) for t in rolling_split.test] == [
        86,
        93,
        100,
        36,
        43,
        50,
    ]
예제 #5
0
    def create_list_datasets(self, cut_lengths=[]):
        """Create timeseries for each identifier tuple and each target.

        Args:
            cut_length (int, optional): Remove the last cut_length time steps of each timeseries. Defaults to empty list.

        Returns:
            List of gluonts.dataset.common.ListDataset with extra keys for each timeseries
        """
        multivariate_timeseries_per_cut_length = [[]
                                                  for cut_length in cut_lengths
                                                  ]
        if self.timeseries_identifiers_names:
            for identifiers_values, identifiers_df in self.dataframe.groupby(
                    self.timeseries_identifiers_names):
                for cut_length_index, cut_length in enumerate(cut_lengths):
                    multivariate_timeseries_per_cut_length[
                        cut_length_index] += self._create_gluon_multivariate_timeseries(
                            identifiers_df,
                            cut_length,
                            identifiers_values=identifiers_values)
        else:
            for cut_length_index, cut_length in enumerate(cut_lengths):
                multivariate_timeseries_per_cut_length[
                    cut_length_index] += self._create_gluon_multivariate_timeseries(
                        self.dataframe, cut_length)
        gluon_list_dataset_per_cut_length = []
        for multivariate_timeseries in multivariate_timeseries_per_cut_length:
            gluon_list_dataset_per_cut_length += [
                ListDataset(multivariate_timeseries, freq=self.frequency)
            ]
        return gluon_list_dataset_per_cut_length
예제 #6
0
def get_dataset(**kw):
    ##check whether dataset is of kind train or test
    data_path = kw['train_data_path'] if kw['train'] else kw['test_data_path']

    #### read from csv file
    if kw.get("uri_type") == "pickle":
        data_set = pd.read_pickle(data_path)
    else:
        data_set = pd.read_csv(data_path)

    ### convert to gluont format
    gluonts_ds = ListDataset([{
        FieldName.TARGET: data_set.iloc[i].values,
        FieldName.START: kw['start']
    } for i in range(kw['num_series'])],
                             freq=kw['freq'])

    if VERBOSE:
        entry = next(iter(gluonts_ds))
        train_series = to_pandas(entry)
        train_series.plot()
        save_fig = kw['save_fig']
        plt.savefig(save_fig)

    return gluonts_ds
예제 #7
0
    def load(self, frequency: str, subset_filter: str,
             training: bool) -> ListDataset:
        """
        Load electricity dataset.

        :param frequency:
        :param subset_filter: dates as "from_date:to_date" in "YYYY-mm-dd H" format.
        :param training: If False then to_date will be extended to 7 days in future.
        :return:
        """
        dates = subset_filter.split(':')
        from_date = pd.to_datetime(dates[0])
        to_date = pd.to_datetime(dates[1])
        if not training:
            to_date = to_date + relativedelta(hours=24 * 7)

        items_all = [{
            'item_id': i,
            'start': from_date,
            'horizon': 24,
            'target': values
        } for i, values in enumerate(
            self.values[:, self._dates_to_index(from_date, to_date)])]

        return ListDataset(items_all, freq=frequency)
예제 #8
0
def test_parallelized_predictor():
    dataset = ListDataset(
        data_iter=[
            {"start": "2012-01-01", "target": (np.zeros(20) + i).tolist()}
            for i in range(300)
        ],
        freq="1H",
    )

    base_predictor = IdentityPredictor(
        freq="1H", prediction_length=10, num_samples=100
    )

    predictor = ParallelizedPredictor(
        base_predictor=base_predictor, num_workers=10, chunk_size=2
    )

    predictions = list(base_predictor.predict(dataset))
    parallel_predictions = list(predictor.predict(dataset))

    assert len(predictions) == len(parallel_predictions)

    for p, pp in zip(predictions, parallel_predictions):
        assert np.all(p.samples == pp.samples)
        assert np.all(p.index == pp.index)
def transform_fn(net, data, input_content_type, output_content_type):
    
    try:
        data = json.loads(data) 
        
        #How many time-series are included?
        N = len(data["value"])
        
        #Create dataset
        test_data = ListDataset(
            [{"start": datetime.strptime(data["index"], "%Y-%m-%d %H:%M:%S"), 
              "target": np.array(data["value"][n])} for n in range(N)
            ],
            freq = data["freq"]
        )
        
        # prediction
        forecast_it = net.predict(test_data)
        forecasts = list(forecast_it)
        
        result = []
        for n in range(N):
            result.append(forecasts[n].samples.tolist())
        response_body = json.dumps(result)
        return response_body, output_content_type
    
    except Exception as e:
        print(e)
        return json.dumps(str(e)), output_content_type
예제 #10
0
def transform_fn(model,
                 request_body,
                 content_type='application/json',
                 accept_type='application/json'):

    data = json.loads(request_body)
    target_test_df = pd.DataFrame(data['value'], index=data['timestamp'])
    target = target_test_df.values
    num_series = target_test_df.shape[1]
    start_dt = target_test_df.index[0]
    test_lst = []

    for i in range(0, num_series):
        target_vec = target[:, i]
        dic = {FieldName.TARGET: target_vec, FieldName.START: start_dt}
        test_lst.append(dic)

    test_ds = ListDataset(test_lst, freq='1D')

    response_body = {}
    forecast_it = model.predict(test_ds)
    for idx, f in enumerate(forecast_it):
        response_body[f'item_{idx}'] = f.samples.mean(axis=0).tolist()

    return json.dumps(response_body)
def test_minute_frequency():
    prediction_length = 1
    timeseries = {
        TIMESERIES_KEYS.START: "2021-01-15 12:40:00",
        TIMESERIES_KEYS.TARGET: np.array([12, 13]),
        TIMESERIES_KEYS.TARGET_NAME: "target",
        TIMESERIES_KEYS.TIME_COLUMN_NAME: "date",
    }
    frequency = "20min"
    gluon_dataset = ListDataset([timeseries], freq=frequency)
    model = Model(
        "simplefeedforward",
        model_parameters={"activated": True, "kwargs": {}},
        frequency=frequency,
        prediction_length=prediction_length,
        epoch=1,
        batch_size=8,
        num_batches_per_epoch=5,
    )
    evaluation_forecasts_df = model.train_evaluate(gluon_dataset, gluon_dataset, make_forecasts=True, retrain=True)[2]
    assert evaluation_forecasts_df["index"].iloc[0] == pd.Timestamp("2021-01-15 13:00:00")

    trained_model = TrainedModel(
        predictor=model.predictor,
        gluon_dataset=gluon_dataset,
        prediction_length=prediction_length,
        quantiles=[0.5],
        include_history=True,
    )
    trained_model.predict()
    forecasts_df = trained_model.get_forecasts_df(session="2021-01-01", model_label="TEST")
    assert forecasts_df["date"].iloc[0] == pd.Timestamp("2021-01-15 13:20:00")
예제 #12
0
def test_feat_dynamic_real_bad_size():
    params = dict(freq="1D", prediction_length=3, prophet_params={})

    dataset = ListDataset(
        data_iter=[{
            "start":
            "2017-01-01",
            "target":
            np.array([1.0, 2.0, 3.0, 4.0]),
            "feat_dynamic_real":
            np.array([
                [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
                [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
            ]),
        }],
        freq=params["freq"],
    )

    with pytest.raises(AssertionError) as excinfo:
        predictor = ProphetPredictor(**params)
        list(predictor.predict(dataset))

    assert str(excinfo.value) == (
        "Length mismatch for dynamic real-valued feature #0: "
        "expected 7, got 6")
예제 #13
0
def initialize_model() -> nn.HybridBlock:
    # dummy training data
    N = 10  # number of time series
    T = 100  # number of timesteps
    prediction_length = 24
    freq = "1H"
    custom_dataset = np.zeros(shape=(N, T))
    start = pd.Timestamp("01-01-2019",
                         freq=freq)  # can be different for each time series
    train_ds = ListDataset(
        [{
            "target": x,
            "start": start
        } for x in custom_dataset[:, :-prediction_length]],
        freq=freq,
    )
    # create a simple model
    estimator = SimpleFeedForwardEstimator(
        num_hidden_dimensions=[10],
        prediction_length=prediction_length,
        context_length=T,
        freq=freq,
        trainer=Trainer(
            ctx="cpu",
            epochs=1,
            learning_rate=1e-3,
            num_batches_per_epoch=1,
        ),
    )

    # train model
    predictor = estimator.train(train_ds)

    return predictor.prediction_net
def test_train_loader_goes_over_all_data(num_workers) -> None:
    batch_size = 4
    num_batches_per_epoch = 4
    num_time_series = batch_size * num_batches_per_epoch * 3
    num_passes = 5
    num_epochs = num_passes * 3

    simple_data = [{
        "start": "2012-01-01",
        "target": np.random.uniform(size=40).astype(float).tolist(),
        "item_id": i,
    } for i in range(num_time_series)]

    def test_dataset(dataset):
        class ExactlyOneSampler(InstanceSampler):
            def __call__(self, ts: np.ndarray, a: int, b: int) -> np.ndarray:
                window_size = b - a + 1
                assert window_size > 0
                return np.array([a])

        transformation = InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            train_sampler=ExactlyOneSampler(),
            past_length=10,
            future_length=5,
            dummy_value=1.0,
        )

        dl = TrainDataLoader(
            dataset=dataset,
            transform=transformation,
            batch_size=batch_size,
            stack_fn=partial(batchify, ctx=current_context()),
            num_workers=num_workers,
        )

        item_ids = defaultdict(int)

        for epoch in range(num_epochs):
            for batch in islice(dl, num_batches_per_epoch):
                for item_id in batch["item_id"]:
                    item_ids[item_id] += 1

        for i in range(len(dataset)):
            assert num_passes - 1 <= item_ids[i] <= num_passes + 1

    test_dataset(ListDataset(simple_data, freq="1H"))

    with tempfile.TemporaryDirectory() as tmpdir:
        with open(tmpdir + "/data.json", "w") as f:
            for data in simple_data:
                json.dump(data, f)
                f.write("\n")

        test_dataset(FileDataset(Path(tmpdir), freq="1H"))
        test_dataset(FileDataset(Path(tmpdir), freq="1H", cache=True))
예제 #15
0
def test_agg_lags(pred_length, rolling_lags):
    # create dummy dataset
    target = np.array([1, 1, 1, 2, 2, 3, 3, 4, 5, 6])
    start = pd.Timestamp("01-01-2019 01:00:00", freq="1H")
    freq = "1H"
    ds = ListDataset(
        [{FieldName.TARGET: target, FieldName.START: start}], freq=freq
    )

    # 2H aggregate lags
    lags_2H = [1, 2, 3, 4, 6]

    add_agg_lags = AddAggregateLags(
        target_field=FieldName.TARGET,
        output_field="lags_2H",
        pred_length=pred_length,
        base_freq=freq,
        agg_freq="2H",
        agg_lags=lags_2H,
        rolling_agg=rolling_lags,
    )

    assert add_agg_lags.ratio == 2

    train_entry = next(add_agg_lags(iter(ds), is_train=True))
    test_entry = next(add_agg_lags(iter(ds), is_train=False))

    if rolling_lags:
        assert (
            add_agg_lags.valid_lags
            == valid_lags_rolling[f"prediction_length_{pred_length}"]
        )

        assert np.allclose(
            train_entry["lags_2H"],
            expected_lags_rolling[f"prediction_length_{pred_length}"]["train"],
        )

        assert np.allclose(
            test_entry["lags_2H"],
            expected_lags_rolling[f"prediction_length_{pred_length}"]["test"],
        )
    else:
        assert (
            add_agg_lags.valid_lags
            == valid_lags_calendar[f"prediction_length_{pred_length}"]
        )

        assert np.allclose(
            train_entry["lags_2H"],
            expected_lags_calendar[f"prediction_length_{pred_length}"][
                "train"
            ],
        )

        assert np.allclose(
            test_entry["lags_2H"],
            expected_lags_calendar[f"prediction_length_{pred_length}"]["test"],
        )
def log1p_tds(dataset: TrainDatasets) -> TrainDatasets:
    """Create a new train datasets with targets log-transformed."""
    # Implementation note: currently, the only way is to eagerly load all timeseries in memory, and do the transform.
    train = ListDataset(dataset.train, freq=dataset.metadata.freq)
    log1p(train)

    if dataset.test is not None:
        test = ListDataset(dataset.test, freq=dataset.metadata.freq)
        log1p(test)
    else:
        test = None

    # fmt: off
    return TrainDatasets(
        dataset.metadata.copy(),  # Note: pydantic's deep copy.
        train=train,
        test=test)
 def test_training_bad_seasonality_user_input(self):
     prediction_length = 1
     frequency = "3M"
     gluon_dataset = ListDataset(self.timeseries, freq=frequency)
     with pytest.raises(ValueError):
         estimator = AutoARIMAEstimator(prediction_length=prediction_length,
                                        freq=frequency,
                                        m=12)
def get_dataset_and_transformation():
    # dont recompute, since expensive
    global _data_cache
    if _data_cache is not None:
        return _data_cache

    # create constant dataset with each time series having
    # variable length and unique constant integer entries
    dataset = ConstantDataset(
        num_steps=CD_NUM_STEPS, num_timeseries=CD_NUM_TIME_SERIES
    )
    list_dataset = list(dataset.train)
    for i, ts in enumerate(list_dataset):
        ts["start"] = pd.Timestamp(ts_input=ts["start"], freq=dataset.freq)
        # get randomness in the ts lengths
        ts["target"] = np.array(
            ts["target"] * random.randint(1, CD_MAX_LEN_MULTIPLICATION_FACTOR)
        )
    list_dataset = ListDataset(data_iter=list_dataset, freq=dataset.freq)
    list_dataset_pred_length = dataset.prediction_length

    # use every possible time point to split the time series
    transformation = Chain(
        [
            InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                train_sampler=UniformSplitSampler(
                    p=SPLITTING_SAMPLE_PROBABILITY  # THIS IS IMPORTANT FOR THE TEST
                ),
                past_length=CONTEXT_LEN,
                future_length=list_dataset_pred_length,
                dummy_value=1.0,
            ),
        ]
    )

    # original no multiprocessing processed validation dataset
    train_data_transformed_original = list(
        ValidationDataLoader(
            dataset=list_dataset,
            transform=transformation,
            batch_size=BATCH_SIZE,
            num_workers=0,  # This is the crucial difference
            ctx=current_context(),
        )
    )

    _data_cache = (
        list_dataset,
        transformation,
        list_dataset_pred_length,
        train_data_transformed_original,
    )

    return _data_cache
def make_dummy_datasets_with_features(
    num_ts: int = 5,
    start: str = "2018-01-01",
    freq: str = "D",
    min_length: int = 5,
    max_length: int = 10,
    prediction_length: int = 3,
    cardinality: List[int] = [],
    num_feat_dynamic_real: int = 0,
) -> Tuple[ListDataset, ListDataset]:

    data_iter_train = []
    data_iter_test = []

    for k in range(num_ts):
        ts_length = randint(min_length, max_length)
        perc_zeros = 0.5
        mask = np.random.rand(ts_length) < perc_zeros
        target = np.array([1.0] * ts_length)
        target[mask] = 0
        data_entry_train = {
            FieldName.START: start,
            FieldName.TARGET: target,
        }
        if len(cardinality) > 0:
            data_entry_train[FieldName.FEAT_STATIC_CAT] = [
                randint(0, c) for c in cardinality
            ]
        data_entry_test = data_entry_train.copy()
        if num_feat_dynamic_real > 0:
            data_entry_train[FieldName.FEAT_DYNAMIC_REAL] = [
                [float(1 + k)] * ts_length
                for k in range(num_feat_dynamic_real)
            ]
            data_entry_test[FieldName.FEAT_DYNAMIC_REAL] = [
                [float(1 + k)] * (ts_length + prediction_length)
                for k in range(num_feat_dynamic_real)
            ]
        data_iter_train.append(data_entry_train)
        data_iter_test.append(data_entry_test)

    return (
        ListDataset(data_iter=data_iter_train, freq=freq),
        ListDataset(data_iter=data_iter_test, freq=freq),
    )
예제 #20
0
파일: __init__.py 프로젝트: jroakes/atspy
def gluonts_dataframe(df):
    freqed = pd.infer_freq(df.index)
    if freqed == "MS":
        freq = "M"
        # start = df.index[0] + relativedelta(months=1)
    else:
        freq = freqed
    df = ListDataset([{"start": df.index[0], "target": df.values}], freq=freq)
    return df
예제 #21
0
def test_multivariate_grouper_test(univariate_ts, multivariate_ts,
                                   test_fill_rule, max_target_dim) -> None:
    univariate_ds = ListDataset(univariate_ts, freq="1D")
    multivariate_ds = ListDataset(multivariate_ts,
                                  freq="1D",
                                  one_dim_target=False)

    grouper = MultivariateGrouper(
        test_fill_rule=test_fill_rule,
        num_test_dates=2,
        max_target_dim=max_target_dim,
    )

    for grouped_data, multivariate_data in zip(grouper(univariate_ds),
                                               multivariate_ds):
        assert (grouped_data["target"] == multivariate_data["target"]).all()

        assert grouped_data["start"] == multivariate_data["start"]
예제 #22
0
def _test_nans_in_target(predictor: NPTSPredictor, dataset: Dataset) -> None:
    """
    Test that the model behaves as expected when the target time series
    contains NaN values.

    Parameters
    ----------
    predictor
        the predictor instance to test
    dataset
        a dataset (with targets without NaNs) to use as a base for the test
    """

    # a copy of dataset with 90% of the target entries NaNs
    ds_090pct_nans = ListDataset(
        data_iter=[
            _inject_nans_in_target(data_entry, p=0.9) for data_entry in dataset
        ],
        freq=predictor.freq,
    )

    # a copy of dataset with 100% of the target entries NaNs
    ds_100pct_nans = ListDataset(
        data_iter=[
            _inject_nans_in_target(data_entry, p=1.0) for data_entry in dataset
        ],
        freq=predictor.freq,
    )

    # assert that we can tolerate a high percentage of NaNs
    for forecast in predictor.predict(ds_090pct_nans):
        assert np.all(np.isfinite(forecast.samples)), "Forecast contains NaNs."

    # assert that an exception is thrown if 100% of the values are NaN
    with pytest.raises(GluonTSDataError) as excinfo:
        for _ in predictor.predict(ds_100pct_nans):
            pass
    assert (
        f"The last {predictor.context_length} positions of the target time "
        f"series are all NaN. Please increase the `context_length` "
        f"parameter of your NPTS model so the last "
        f"{predictor.context_length} positions of each target contain at "
        f"least one non-NaN value.") in str(excinfo.value)
def make_dataset(N, train_length):
    # generates 2 ** N - 1 timeseries with constant increasing values
    n = 2 ** N - 1

    targets = np.arange(n * train_length).reshape((n, train_length))

    return ListDataset(
        [{"start": "2012-01-01", "target": targets[i, :]} for i in range(n)],
        freq="D",
    )
예제 #24
0
 def predict(self, dataset: Dataset, **kwargs) -> Iterator[Forecast]:
     logger = logging.getLogger(__name__)
     for i, ts in enumerate(dataset, start=1):
         logger.info(f"training for time series {i} / {len(dataset)}")
         local_ds = ListDataset([ts], freq=self.freq)
         trained_pred = self.estimator.train(local_ds)
         logger.info(f"predicting for time series {i} / {len(dataset)}")
         predictions = trained_pred.predict(local_ds, **kwargs)
         for pred in predictions:
             yield pred
예제 #25
0
파일: _base.py 프로젝트: stadlmax/gluon-ts
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]:
    metadata = MetaData(
        time_granularity='1H',
        feat_static_cat=[
            CategoricalFeatureInfo(name='feat_static_cat_000',
                                   cardinality='10')
        ],
        feat_static_real=[BasicFeatureInfo(name='feat_static_real_000')],
    )

    start_date = '2000-01-01 00:00:00'

    train_ds = ListDataset(
        data_iter=[{
            'item': str(i),
            'start': start_date,
            'target': [float(i)] * 24,
            'feat_static_cat': [i],
            'feat_static_real': [float(i)],
        } for i in range(10)],
        freq=metadata.time_granularity,
    )

    test_ds = ListDataset(
        data_iter=[{
            'item': str(i),
            'start': start_date,
            'target': [float(i)] * 30,
            'feat_static_cat': [i],
            'feat_static_real': [float(i)],
        } for i in range(10)],
        freq=metadata.time_granularity,
    )

    info = DatasetInfo(
        name='constant_dataset',
        metadata=metadata,
        prediction_length=2,
        train_statistics=calculate_dataset_statistics(train_ds),
        test_statistics=calculate_dataset_statistics(test_ds),
    )

    return info, train_ds, test_ds
예제 #26
0
파일: _base.py 프로젝트: yx1215/gluon-ts
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]:
    metadata = MetaData(
        freq="1H",
        feat_static_cat=[
            CategoricalFeatureInfo(name="feat_static_cat_000",
                                   cardinality="10")
        ],
        feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")],
    )

    start_date = "2000-01-01 00:00:00"

    train_ds = ListDataset(
        data_iter=[{
            FieldName.ITEM_ID: str(i),
            FieldName.START: start_date,
            FieldName.TARGET: [float(i)] * 24,
            FieldName.FEAT_STATIC_CAT: [i],
            FieldName.FEAT_STATIC_REAL: [float(i)],
        } for i in range(10)],
        freq=metadata.freq,
    )

    test_ds = ListDataset(
        data_iter=[{
            FieldName.ITEM_ID: str(i),
            FieldName.START: start_date,
            FieldName.TARGET: [float(i)] * 30,
            FieldName.FEAT_STATIC_CAT: [i],
            FieldName.FEAT_STATIC_REAL: [float(i)],
        } for i in range(10)],
        freq=metadata.freq,
    )

    info = DatasetInfo(
        name="constant_dataset",
        metadata=metadata,
        prediction_length=2,
        train_statistics=calculate_dataset_statistics(train_ds),
        test_statistics=calculate_dataset_statistics(test_ds),
    )

    return info, train_ds, test_ds
예제 #27
0
파일: _base.py 프로젝트: litieops/gluon-ts
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]:
    metadata = MetaData(
        freq="1H",
        feat_static_cat=[
            CategoricalFeatureInfo(name="feat_static_cat_000",
                                   cardinality="10")
        ],
        feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")],
    )

    start_date = "2000-01-01 00:00:00"

    train_ds = ListDataset(
        data_iter=[{
            "item_id": str(i),
            "start": start_date,
            "target": [float(i)] * 24,
            "feat_static_cat": [i],
            "feat_static_real": [float(i)],
        } for i in range(10)],
        freq=metadata.freq,
    )

    test_ds = ListDataset(
        data_iter=[{
            "item_id": str(i),
            "start": start_date,
            "target": [float(i)] * 30,
            "feat_static_cat": [i],
            "feat_static_real": [float(i)],
        } for i in range(10)],
        freq=metadata.freq,
    )

    info = DatasetInfo(
        name="constant_dataset",
        metadata=metadata,
        prediction_length=2,
        train_statistics=calculate_dataset_statistics(train_ds),
        test_statistics=calculate_dataset_statistics(test_ds),
    )

    return info, train_ds, test_ds
예제 #28
0
    def invocations() -> Response:
        request_data = request.data.decode("utf8").strip()

        # request_data can be empty, but .split() will produce a non-empty
        # list, which then means we try to decode an empty string, which
        # causes an error: `''.split() == ['']`
        if request_data:
            instances = list(map(json.loads, request_data.split("\n")))
        else:
            instances = []

        dataset = ListDataset(instances, predictor.freq)

        start_time = time.time()

        if settings.gluonts_batch_timeout > 0:
            predictions = with_timeout(
                make_predictions,
                args=(predictor, dataset, configuration),
                timeout=settings.gluonts_batch_timeout,
            )

            # predictions are None, when predictor timed out
            if predictions is None:
                logger.warning(f"predictor timed out for: {request_data}")
                FallbackPredictor = forecaster_type_by_name(
                    settings.gluonts_batch_fallback_predictor
                )
                fallback_predictor = FallbackPredictor(
                    freq=predictor.freq,
                    prediction_length=predictor.prediction_length,
                )

                predictions = make_predictions(
                    fallback_predictor, dataset, configuration
                )
        else:
            predictions = make_predictions(predictor, dataset, configuration)

        end_time = time.time()

        scored_instances.append(
            ScoredInstanceStat(
                amount=len(predictions), duration=end_time - start_time
            )
        )

        log_scored(when=end_time)

        for forward_field in settings.gluonts_forward_fields:
            for input_item, prediction in zip(dataset, predictions):
                prediction[forward_field] = input_item.get(forward_field)

        lines = list(map(json.dumps, map(jsonify_floats, predictions)))
        return Response("\n".join(lines), mimetype="application/jsonlines")
예제 #29
0
    def _prepare_train_data(self, dataset: Dataset) -> ListDataset:
        logging.info("group training time-series to datasets")

        grouped_data = self._transform_target(self._align_data_entry, dataset)
        grouped_data = self._restrict_max_dimensionality(grouped_data)
        grouped_data[FieldName.START] = self.first_timestamp
        grouped_data[FieldName.FEAT_STATIC_CAT] = [0]

        return ListDataset([grouped_data],
                           freq=self.frequency,
                           one_dim_target=False)
예제 #30
0
def trans_df2gluon(dflist,freq="1H"):
    datadictlist = []
    for data in dflist:
        rawdata  =data.copy()
        rawdata['ds'] = pd.to_datetime(rawdata['ds'])
        rawdata.set_index('ds',inplace=True)
        rawdata = rawdata.resample(freq).mean()
        data_dict = {"start":rawdata.index[0],"target":rawdata.y}
        datadictlist.append(data_dict)
    dataset = ListDataset(datadictlist,freq=freq)
    return dataset