def deserialize( cls, path: Path, device: Optional[torch.device] = None) -> "PyTorchPredictor": # deserialize constructor parameters with (path / "parameters.json").open("r") as fp: parameters = load_json(fp.read()) # deserialize transformation chain with (path / "input_transform.json").open("r") as fp: transformation = load_json(fp.read()) # deserialize network with (path / f"prediction_net.json").open("r") as fp: prediction_net = load_json(fp.read()) prediction_net.load_state_dict( torch.load(path / "prediction_net_state")) parameters["device"] = device return PyTorchPredictor( input_transform=transformation, prediction_net=prediction_net, **parameters, )
def deserialize( cls, path: Path, ctx: Optional[mx.Context] = None) -> "RepresentableBlockPredictor": ctx = ctx if ctx is not None else get_mxnet_context() with mx.Context(ctx): # deserialize constructor parameters with (path / "parameters.json").open("r") as fp: parameters = load_json(fp.read()) # deserialize transformation chain with (path / "input_transform.json").open("r") as fp: transform = load_json(fp.read()) # deserialize prediction network prediction_net = import_repr_block(path, "prediction_net") # input_names is derived from the prediction_net if "input_names" in parameters: del parameters["input_names"] parameters["ctx"] = ctx return RepresentableBlockPredictor( input_transform=transform, prediction_net=prediction_net, **parameters, )
def deserialize( cls, path: Path, ctx: Optional[mx.Context] = None) -> "SymbolBlockPredictor": ctx = ctx if ctx is not None else get_mxnet_context() with mx.Context(ctx): # deserialize constructor parameters with (path / "parameters.json").open("r") as fp: parameters = load_json(fp.read()) parameters["ctx"] = ctx # deserialize transformation chain with (path / "input_transform.json").open("r") as fp: transform = load_json(fp.read()) # deserialize prediction network num_inputs = len(parameters["input_names"]) prediction_net = import_symb_block(num_inputs, path, "prediction_net") return SymbolBlockPredictor( input_transform=transform, prediction_net=prediction_net, **parameters, )
def test_component_ctor(): random.seed(5_432_671_244) A = 100 B = 200 C = 300 x_list = [ Foo( str(random.randint(0, A)), Complex(x=random.uniform(0, C), y=str(random.uniform(0, C))), b=random.uniform(0, B), ) for i in range(4) ] fields = [ Foo( a=str(random.randint(0, A)), b=random.uniform(0, B), c=Complex(x=str(random.uniform(0, C)), y=random.uniform(0, C)), ) for i in range(5) ] x_dict = { i: Foo( b=random.uniform(0, B), a=str(random.randint(0, A)), c=Complex(x=str(random.uniform(0, C)), y=str(random.uniform(0, C))), ) for i in range(6) } bar01 = Bar(x_list, fields=fields, x_dict=x_dict) bar02 = load_code(dump_code(bar01)) bar03 = load_json(dump_json(bar02)) def compare_tpes(x, y, z, tpe): assert tpe == type(x) == type(y) == type(z) def compare_vals(x, y, z): assert x == y == z compare_tpes(bar02.x_list, bar02.x_list, bar03.x_list, tpe=list) compare_tpes(bar02.x_dict, bar02.x_dict, bar03.x_dict, tpe=dict) compare_tpes(bar02.fields, bar02.fields, bar03.fields, tpe=list) compare_vals(len(bar02.x_list), len(bar02.x_list), len(bar03.x_list)) compare_vals(len(bar02.x_dict), len(bar02.x_dict), len(bar03.x_dict)) compare_vals(len(bar02.fields), len(bar02.fields), len(bar03.fields)) compare_vals(bar02.x_list, bar02.x_list, bar03.x_list) compare_vals(bar02.x_dict, bar02.x_dict, bar03.x_dict) compare_vals(bar02.fields, bar02.fields, bar03.fields) baz01 = Baz(a="0", b="9", c=Complex(x="1", y="2"), d="42") baz02 = load_json(dump_json(baz01)) assert type(baz01) == type(baz02) assert baz01 == baz02
def import_repr_block( model_dir: Path, model_name: str, epoch: int = 0 ) -> mx.gluon.HybridBlock: """ Deserializes a representable Gluon block. Parameters ---------- model_dir The path where the model is saved. model_name The name identifying the model. epoch The epoch number, which together with the `model_name` identifies the model parameters. Returns ------- mx.gluon.HybridBlock: The deserialized block. """ with (model_dir / f"{model_name}-network.json").open("r") as fp: rb = cast(mx.gluon.HybridBlock, load_json(fp.read())) rb.load_parameters( str(model_dir / f"{model_name}-{epoch:04}.params"), ctx=mx.current_context(), allow_missing=False, ignore_extra=False, ) return rb
def deserialize( cls, path: Path, ctx: Optional[mx.Context] = None) -> "NBEATSEnsemblePredictor": # deserialize constructor parameters with (path / "parameters.json").open("r") as fp: parameters = load_json(fp.read()) # basically save each predictor in its own sub-folder num_predictors = parameters["num_predictors"] num_digits = len(str(num_predictors)) predictors = [] # load all the predictors individually and also make sure not to load anything else by mistake predictor_locations = [ f"predictor_{str(index).zfill(num_digits)}" for index in range(num_predictors) ] # deserialize predictors for sub_dir in predictor_locations: predictors.append( RepresentableBlockPredictor.deserialize(path / sub_dir, ctx)) return NBEATSEnsemblePredictor( prediction_length=parameters["prediction_length"], freq=parameters["freq"], predictors=predictors, aggregation_method=parameters["aggregation_method"], )
def assert_serializable(x: transform.Transformation): t = fqname_for(x.__class__) y = load_json(dump_json(x)) z = load_code(dump_code(x)) assert dump_json(x) == dump_json( y), f"Code serialization for transformer {t} does not work" assert dump_code(x) == dump_code( z), f"JSON serialization for transformer {t} does not work"
def import_symb_block( num_inputs: int, model_dir: Path, model_name: str, epoch: int = 0 ) -> mx.gluon.SymbolBlock: """ Deserializes a hybridized Gluon `HybridBlock` as a `SymbolBlock`. Parameters ---------- num_inputs The number of inputs of the serialized block. model_dir The path where the model is saved. model_name The name identifying the model. epoch The epoch number, which together with the `model_name` identifies the model parameters. Returns ------- mx.gluon.SymbolBlock The deserialized block. """ if num_inputs == 1: input_names = ["data"] else: input_names = [f"data{i}" for i in range(num_inputs)] # FIXME: prevents mxnet from failing with empty saved parameters list # FIXME: https://github.com/apache/incubator-mxnet/issues/17488 param_file: Optional[str] = str( model_dir / f"{model_name}-{epoch:04}.params" ) if not mx.nd.load(param_file): param_file = None # FIXME: mx.gluon.SymbolBlock cannot infer float_type and uses default # np.float32 # FIXME: https://github.com/apache/incubator-mxnet/issues/11849 sb = mx.gluon.SymbolBlock.imports( symbol_file=str(model_dir / f"{model_name}-symbol.json"), input_names=input_names, param_file=param_file, ctx=mx.current_context(), ) # FIXME: try to retrieve input/output format # FIXME: https://github.com/apache/incubator-mxnet/issues/17488 format_json_path = model_dir / f"{model_name}-in_out_format.json" if format_json_path.exists(): with format_json_path.open("r") as fp: formats = load_json(fp.read()) sb._in_format = formats["in_format"] sb._out_format = formats["out_format"] return sb
def train(arguments): """ Generic train method that trains a specified estimator on a specified dataset. """ logger.info("Downloading estimator config.") estimator_config = Path(arguments.estimator) / "estimator.json" with estimator_config.open() as config_file: estimator = serde.load_json(config_file.read()) logger.info("Downloading dataset.") if arguments.s3_dataset is None: # load built in dataset dataset = datasets.get_dataset(arguments.dataset) else: # load custom dataset s3_dataset_dir = Path(arguments.s3_dataset) dataset = common.load_datasets( metadata=s3_dataset_dir, train=s3_dataset_dir / "train", test=s3_dataset_dir / "test", ) logger.info("Starting model training.") predictor = estimator.train(dataset.train) forecast_it, ts_it = backtest.make_evaluation_predictions( dataset=dataset.test, predictor=predictor, num_samples=int(arguments.num_samples), ) logger.info("Starting model evaluation.") evaluator = Evaluator(quantiles=eval(arguments.quantiles)) agg_metrics, item_metrics = evaluator(ts_it, forecast_it, num_series=len(list(dataset.test))) # required for metric tracking. for name, value in agg_metrics.items(): logger.info(f"gluonts[metric-{name}]: {value}") # save the evaluation results metrics_output_dir = Path(arguments.output_data_dir) with open(metrics_output_dir / "agg_metrics.json", "w") as f: json.dump(agg_metrics, f) with open(metrics_output_dir / "item_metrics.csv", "w") as f: item_metrics.to_csv(f, index=False) # save the model model_output_dir = Path(arguments.model_dir) predictor.serialize(model_output_dir)
def deserialize(cls, path: Path): try: # deserialize constructor parameters with (path / "parameters.json").open("r") as fp: parameters = load_json(fp.read()) # deserialize transformation chain with (path / "input_transform.json").open("r") as fp: transform = load_json(fp.read()) # deserialize prediction network num_inputs = len(parameters["input_names"]) prediction_net = import_symb_block(num_inputs, path, "prediction_net") return SymbolBlockPredictor( input_transform=transform, prediction_net=prediction_net, **parameters, ) except Exception as e: raise IOError(f"Cannot deserialize {fqname_for(cls)}") from e
def deserialize(cls, path: Path): try: # deserialize constructor parameters with (path / 'parameters.json').open('r') as fp: parameters = load_json(fp.read()) # deserialize transformation chain with (path / 'input_transform.json').open('r') as fp: transform = load_json(fp.read()) # deserialize prediction network prediction_net = import_repr_block(path, 'prediction_net') # input_names is derived from the prediction_net if 'input_names' in parameters: del parameters['input_names'] return RepresentableBlockPredictor( input_transform=transform, prediction_net=prediction_net, **parameters, ) except Exception as e: raise IOError(f'Cannot deserialize {fqname_for(cls)}') from e
def deserialize( cls, path: Path, ctx: Optional[mx.Context] = None ) -> "DeepRenewalProcessPredictor": repr_predictor = super().deserialize(path, ctx) ctx = repr_predictor.ctx with mx.Context(ctx): # deserialize constructor parameters with (path / "parameters.json").open("r") as fp: parameters = load_json(fp.read()) parameters["ctx"] = ctx return DeepRenewalProcessPredictor( input_transform=repr_predictor.input_transform, prediction_net=repr_predictor.prediction_net, **parameters, )
def deserialize(cls, path: Path, ctx: Optional[mx.Context] = None, **kwargs) -> "NBEATSEnsemblePredictor": """ Load a serialized NBEATSEnsemblePredictor from the given path. Parameters ---------- path Path to the serialized files predictor. ctx Optional mxnet context parameter to be used with the predictor. If nothing is passed will use the GPU if available and CPU otherwise. """ # deserialize constructor parameters with (path / "parameters.json").open("r") as fp: parameters = load_json(fp.read()) # basically save each predictor in its own sub-folder num_predictors = parameters["num_predictors"] num_digits = len(str(num_predictors)) predictors = [] # load all the predictors individually and also make sure not to load # anything else by mistake predictor_locations = [ f"predictor_{str(index).zfill(num_digits)}" for index in range(num_predictors) ] # deserialize predictors for sub_dir in predictor_locations: predictors.append( RepresentableBlockPredictor.deserialize(path / sub_dir, ctx)) return NBEATSEnsemblePredictor( prediction_length=parameters["prediction_length"], freq=parameters["freq"], predictors=predictors, aggregation_method=parameters["aggregation_method"], )
def deserialize( cls, path: Path, # TODO this is temporary, we should make the callable object serializable in the first place scaling: Callable[[pd.Series], Tuple[pd.Series, float]] = mean_abs_scaling, **kwargs, ) -> "Predictor": # deserialize constructor parameters with (path / "parameters.json").open("r") as fp: parameters = load_json(fp.read()) loaded_ag_path = parameters["ag_path"] del parameters["ag_path"] # load tabular model ag_model = AutogluonTabularPredictor.load(loaded_ag_path) return TabularPredictor(ag_model=ag_model, scaling=scaling, **parameters)
def decode_sagemaker_parameter(value: str) -> Union[list, dict, str]: """ All values passed through the SageMaker API are encoded as strings. Thus we pro-actively decode values that seem like arrays or dicts. Integer values (e.g. `"1"`) are handled by pydantic models further down the pipeline. """ value = value.strip() # TODO: is this the right way to do things? # what about fields which start which match the pattern for # some reason? is_list = value.startswith("[") and value.endswith("]") is_dict = value.startswith("{") and value.endswith("}") if is_list or is_dict: return load_json(value) else: return value
def test_json_serialization(e) -> None: assert e == serde.load_json(serde.dump_json(e))
def test_json_serialization(e) -> None: assert equals(e, serde.load_json(serde.dump_json(e)))
def deserialize( cls, path: Path, ctx: Optional[mx.Context] = None) -> "RepresentablePredictor": with (path / "predictor.json").open("r") as fp: return load_json(fp.read())
def deserialize(cls, path: Path): try: with (path / "predictor.json").open("r") as fp: return load_json(fp.read()) except Exception as e: raise IOError(f"Cannot deserialize {fqname_for(cls)}") from e
from gluonts.model.common import Tensor, NPArrayLike from gluonts.mx.distribution.distribution import Distribution from gluonts.mx.distribution import ( Gaussian, StudentT, MixtureDistribution, GaussianOutput, StudentTOutput, LaplaceOutput, MultivariateGaussianOutput, MixtureDistributionOutput, ) from gluonts.testutil import empirical_cdf from gluonts.core.serde import dump_json, load_json serialize_fn_list = [lambda x: x, lambda x: load_json(dump_json(x))] def plot_samples(s: Tensor, bins: int = 100) -> None: from matplotlib import pyplot as plt s = s.asnumpy() plt.hist(s, bins=bins) plt.show() BINS = np.linspace(-5, 5, 100) def histogram(samples: NPArrayLike) -> np.ndarray: h, _ = np.histogram(samples, bins=BINS, density=True)
def test_json_serialization(e) -> None: expected, actual = e, serde.load_json(serde.dump_json(e)) assert check_equality(expected, actual)
@pytest.mark.parametrize( "a", [ mx.nd.random.uniform(shape=(3, 5, 2), dtype="float16"), mx.nd.random.uniform(shape=(3, 5, 2), dtype="float32"), mx.nd.random.uniform(shape=(3, 5, 2), dtype="float64"), mx.nd.array([[1, 2, 3], [-1, -2, 0]], dtype=np.uint8), mx.nd.array([[1, 2, 3], [-1, -2, 0]], dtype=np.int32), mx.nd.array([[1, 2, 3], [-1, -2, 0]], dtype=np.int64), mx.nd.array([[1, 2, 3], [1, 2, 0]], dtype=np.uint8), ], ) @pytest.mark.parametrize( "serialize_fn", [ lambda x: serde.load_json(serde.dump_json(x)), lambda x: serde.load_binary(serde.dump_binary(x)), lambda x: serde.load_code(serde.dump_code(x)), ], ) def test_ndarray_serialization(a, serialize_fn) -> None: b = serialize_fn(a) assert type(a) == type(b) assert a.dtype == b.dtype assert a.shape == b.shape assert np.all((a == b).asnumpy()) def test_timestamp_encode_decode() -> None: now = pd.Timestamp.now() assert now == serde.decode(serde.encode(now))
def deserialize(cls, path: Path) -> "RepresentablePredictor": with (path / "predictor.json").open("r") as fp: return load_json(fp.read())