def test_add_time_features(): tran = transform.AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=10, ) tran2 = clone( tran, { "time_features": [ time_feature.DayOfWeek(), time_feature.DayOfMonth(), ] }, ) assert equals(tran, clone(tran)) assert not equals(tran, tran2)
def test_gluon_predictor(): train_length = 100 pred_length = 10 estimator = CanonicalRNNEstimator("5min", train_length, pred_length) assert equals(estimator, clone(estimator)) assert not equals(estimator, clone(estimator, {"freq": "1h"}))
def test_map_transformation(): tran = transform.VstackFeatures( output_field="dynamic_feat", input_fields=["age", "time_feat"], drop_inputs=True, ) assert equals(tran, clone(tran)) assert not equals(tran, clone(tran, {"drop_inputs": False}))
def test_filter_transformation(): prediction_length = 10 tran1 = transform.FilterTransformation( lambda x: x["target"].shape[-1] > prediction_length) # serde.flat.clone(tran1) does not work on tran2 = transform.FilterTransformation( lambda x: x["target"].shape[-1] > prediction_length) tran3 = transform.FilterTransformation( condition=lambda x: x["target"].shape[-1] < prediction_length) assert equals(tran1, tran2) assert not equals(tran1, tran3)
def test_continuous_time_splitter(): splitter = transform.ContinuousTimeInstanceSplitter( past_interval_length=1, future_interval_length=1, instance_sampler=transform.ContinuousTimePointSampler(), ) splitter2 = transform.ContinuousTimeInstanceSplitter( past_interval_length=1, future_interval_length=1, instance_sampler=transform.ContinuousTimePointSampler(min_past=1.0), ) assert equals(splitter, clone(splitter)) assert not equals(splitter, splitter2)
def test_chain(): chain = transform.Chain(trans=[ transform.AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=10, ), transform.AddAgeFeature( target_field=FieldName.TARGET, output_field="age", pred_length=10, log_scale=True, ), transform.AddObservedValuesIndicator(target_field=FieldName.TARGET, output_field="observed_values"), ]) assert equals(chain, clone(chain)) assert not equals(chain, clone(chain, {"trans": []})) another_chain = transform.Chain(trans=[ transform.AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=10, ), transform.AddAgeFeature( target_field=FieldName.TARGET, output_field="age", pred_length=10, log_scale=False, ), transform.AddObservedValuesIndicator(target_field=FieldName.TARGET, output_field="observed_values"), ]) assert not equals(chain, another_chain)
def test_nested_params(): deepar = DeepAREstimator(prediction_length=7, freq="D") assert equals(deepar, serde.flat.decode(serde.flat.encode(deepar))) deepar2 = serde.flat.clone(deepar, {"trainer.epochs": 999}) assert deepar2.trainer.epochs == 999
def test_nested_params(): b = B(a=A(value=42), b=99) assert equals(b, serde.flat.decode(serde.flat.encode(b))) b2 = serde.flat.clone(b, {"a.value": 999}) assert b2.a.value == 999
def __eq__(self, that): if type(self) != type(that): return False # TODO: also consider equality of the pipelines # if not equals(self.input_transform, that.input_transform): # return False return equals( self.prediction_net.collect_params(), that.prediction_net.collect_params(), )
def test_instance_splitter(): splitter = transform.InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=transform.ExpectedNumInstanceSampler(num_instances=4), past_length=100, future_length=10, time_series_fields=["dynamic_feat", "observed_values"], ) splitter2 = clone( splitter, { "instance_sampler": transform.ExpectedNumInstanceSampler(num_instances=5) }, ) assert equals(splitter, clone(splitter)) assert not equals(splitter, splitter2)
def test_dynamic_shell(train_env: TrainEnv, dynamic_server: "testutil.ServerFacade", caplog) -> None: execution_parameters = dynamic_server.execution_parameters() assert "BatchStrategy" in execution_parameters assert "MaxConcurrentTransforms" in execution_parameters assert "MaxPayloadInMB" in execution_parameters assert execution_parameters["BatchStrategy"] == "SINGLE_RECORD" assert execution_parameters["MaxPayloadInMB"] == 6 configuration = { "num_eval_samples": 1, # FIXME: this is ignored "output_types": ["mean", "samples"], "quantiles": [], **train_env.hyperparameters, } for entry in train_env.datasets["train"]: forecast = dynamic_server.invocations([entry], configuration)[0] for output_type in configuration["output_types"]: assert output_type in forecast act_mean = np.array(forecast["mean"]) act_samples = np.array(forecast["samples"]) mean = np.mean(entry["target"]) exp_mean_shape = (prediction_length, ) exp_samples_shape = (num_samples, prediction_length) exp_mean = mean * np.ones(shape=(prediction_length, )) exp_samples = mean * np.ones(shape=exp_samples_shape) assert exp_mean_shape == act_mean.shape assert exp_samples_shape == act_samples.shape assert equals(exp_mean, act_mean) assert equals(exp_samples, act_samples)
def test_dynamic_batch_shell( batch_transform, train_env: TrainEnv, dynamic_server: "testutil.ServerFacade", caplog, ) -> None: execution_parameters = dynamic_server.execution_parameters() assert "BatchStrategy" in execution_parameters assert "MaxConcurrentTransforms" in execution_parameters assert "MaxPayloadInMB" in execution_parameters assert execution_parameters["BatchStrategy"] == "SINGLE_RECORD" assert execution_parameters["MaxPayloadInMB"] == 6 for entry in train_env.datasets["train"]: entry["foo"] = 42 forecast = dynamic_server.batch_invocations([entry])[0] for output_type in batch_transform["output_types"]: assert output_type in forecast assert forecast["foo"] == 42 act_mean = np.array(forecast["mean"]) act_samples = np.array(forecast["samples"]) mean = np.mean(entry["target"]) exp_mean_shape = (prediction_length, ) exp_samples_shape = (num_samples, prediction_length) exp_mean = mean * np.ones(shape=(prediction_length, )) exp_samples = mean * np.ones(shape=exp_samples_shape) assert exp_mean_shape == act_mean.shape assert exp_samples_shape == act_samples.shape assert equals(exp_mean, act_mean) assert equals(exp_samples, act_samples)
def check_equality(expected, actual) -> bool: if isinstance(expected, set): # Sets are serialized as lists — we check if they have the same elements return equals_list(sorted(expected, key=hash), sorted(actual, key=hash)) elif np.issubdtype(type(expected), np.integer): # Integer types are expected to be equal exactly return np.equal(expected, actual) elif np.issubdtype(type(expected), np.inexact): # Floating point types are expected to be equal up a certain digit, as specified in np.isclose return np.allclose(expected, actual) else: return equals(expected, actual)
def test_benchmark(caplog): # makes sure that information logged can be reconstructed from previous # logs with caplog.at_level(logging.DEBUG): dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.freq, dataset_info.prediction_length) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) backtest_metrics(train_ds, test_ds, estimator, evaluator) train_stats = calculate_dataset_statistics(train_ds) test_stats = calculate_dataset_statistics(test_ds) log_info = BacktestInformation.make_from_log_contents(caplog.text) assert train_stats == log_info.train_dataset_stats assert test_stats == log_info.test_dataset_stats assert equals(estimator, log_info.estimator) print(log_info)
def test_benchmark(caplog): # makes sure that information logged can be reconstructed from previous # logs caplog.set_level(logging.DEBUG, logger='log.txt') dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.time_granularity, dataset_info.prediction_length) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) backtest_metrics(train_ds, test_ds, estimator, evaluator) train_stats = calculate_dataset_statistics(train_ds) test_stats = calculate_dataset_statistics(test_ds) log_file = str(Path(__file__).parent / 'log.txt') log_info = BacktestInformation.make_from_log(log_file) assert train_stats == log_info.train_dataset_stats assert test_stats == log_info.test_dataset_stats assert equals(estimator, log_info.estimator) print(log_info)
def test_json_serialization(e) -> None: expected, actual = e, serde.load_json(serde.dump_json(e)) assert equals(expected, actual)
def __eq__(self, that): """ Two RepresentablePredictor instances are considered equal if they have the same constructor arguments. """ return equals(self, that)
def test_binary_serialization(e) -> None: assert equals(e, serde.load_binary(serde.dump_binary(e)))
def test_continuous_time_sampler(): sampler = transform.ContinuousTimeUniformSampler(num_instances=4) assert equals(sampler, clone(sampler)) assert not equals(sampler, clone(sampler, {"num_instances": 5}))
def test_exp_num_sampler(): sampler = transform.ExpectedNumInstanceSampler(num_instances=4) assert equals(sampler, clone(sampler)) assert not equals(sampler, clone(sampler, {"num_instances": 5}))
def test_code_serialization(e) -> None: expected, actual = e, serde.load_code(serde.dump_code(e)) assert equals(expected, actual)
def test_equals_batch(): assert equals(Batch(batch_size=10), Batch(batch_size=10)) assert not equals(Batch(batch_size=10), Batch(batch_size=100))
def test_json_serialization(e) -> None: assert equals(e, serde.load_json(serde.dump_json(e)))
def test_code_serialization(e) -> None: assert equals(e, serde.load_code(serde.dump_code(e)))