Exemple #1
0
    def deserialize(
            cls,
            path: Path,
            device: Optional[torch.device] = None) -> "PyTorchPredictor":
        # deserialize constructor parameters
        with (path / "parameters.json").open("r") as fp:
            parameters = load_json(fp.read())

        # deserialize transformation chain
        with (path / "input_transform.json").open("r") as fp:
            transformation = load_json(fp.read())

        # deserialize network
        with (path / f"prediction_net.json").open("r") as fp:
            prediction_net = load_json(fp.read())
        prediction_net.load_state_dict(
            torch.load(path / "prediction_net_state"))

        parameters["device"] = device

        return PyTorchPredictor(
            input_transform=transformation,
            prediction_net=prediction_net,
            **parameters,
        )
Exemple #2
0
    def deserialize(
            cls,
            path: Path,
            ctx: Optional[mx.Context] = None) -> "RepresentableBlockPredictor":
        ctx = ctx if ctx is not None else get_mxnet_context()

        with mx.Context(ctx):
            # deserialize constructor parameters
            with (path / "parameters.json").open("r") as fp:
                parameters = load_json(fp.read())

            # deserialize transformation chain
            with (path / "input_transform.json").open("r") as fp:
                transform = load_json(fp.read())

            # deserialize prediction network
            prediction_net = import_repr_block(path, "prediction_net")

            # input_names is derived from the prediction_net
            if "input_names" in parameters:
                del parameters["input_names"]

            parameters["ctx"] = ctx

            return RepresentableBlockPredictor(
                input_transform=transform,
                prediction_net=prediction_net,
                **parameters,
            )
Exemple #3
0
    def deserialize(
            cls,
            path: Path,
            ctx: Optional[mx.Context] = None) -> "SymbolBlockPredictor":
        ctx = ctx if ctx is not None else get_mxnet_context()

        with mx.Context(ctx):
            # deserialize constructor parameters
            with (path / "parameters.json").open("r") as fp:
                parameters = load_json(fp.read())

            parameters["ctx"] = ctx

            # deserialize transformation chain
            with (path / "input_transform.json").open("r") as fp:
                transform = load_json(fp.read())

            # deserialize prediction network
            num_inputs = len(parameters["input_names"])
            prediction_net = import_symb_block(num_inputs, path,
                                               "prediction_net")

            return SymbolBlockPredictor(
                input_transform=transform,
                prediction_net=prediction_net,
                **parameters,
            )
Exemple #4
0
def test_component_ctor():
    random.seed(5_432_671_244)

    A = 100
    B = 200
    C = 300

    x_list = [
        Foo(
            str(random.randint(0, A)),
            Complex(x=random.uniform(0, C), y=str(random.uniform(0, C))),
            b=random.uniform(0, B),
        ) for i in range(4)
    ]
    fields = [
        Foo(
            a=str(random.randint(0, A)),
            b=random.uniform(0, B),
            c=Complex(x=str(random.uniform(0, C)), y=random.uniform(0, C)),
        ) for i in range(5)
    ]
    x_dict = {
        i: Foo(
            b=random.uniform(0, B),
            a=str(random.randint(0, A)),
            c=Complex(x=str(random.uniform(0, C)), y=str(random.uniform(0,
                                                                        C))),
        )
        for i in range(6)
    }

    bar01 = Bar(x_list, fields=fields, x_dict=x_dict)
    bar02 = load_code(dump_code(bar01))
    bar03 = load_json(dump_json(bar02))

    def compare_tpes(x, y, z, tpe):
        assert tpe == type(x) == type(y) == type(z)

    def compare_vals(x, y, z):
        assert x == y == z

    compare_tpes(bar02.x_list, bar02.x_list, bar03.x_list, tpe=list)
    compare_tpes(bar02.x_dict, bar02.x_dict, bar03.x_dict, tpe=dict)
    compare_tpes(bar02.fields, bar02.fields, bar03.fields, tpe=list)

    compare_vals(len(bar02.x_list), len(bar02.x_list), len(bar03.x_list))
    compare_vals(len(bar02.x_dict), len(bar02.x_dict), len(bar03.x_dict))
    compare_vals(len(bar02.fields), len(bar02.fields), len(bar03.fields))

    compare_vals(bar02.x_list, bar02.x_list, bar03.x_list)
    compare_vals(bar02.x_dict, bar02.x_dict, bar03.x_dict)
    compare_vals(bar02.fields, bar02.fields, bar03.fields)

    baz01 = Baz(a="0", b="9", c=Complex(x="1", y="2"), d="42")
    baz02 = load_json(dump_json(baz01))

    assert type(baz01) == type(baz02)
    assert baz01 == baz02
Exemple #5
0
def import_repr_block(
    model_dir: Path, model_name: str, epoch: int = 0
) -> mx.gluon.HybridBlock:
    """
    Deserializes a representable Gluon block.

    Parameters
    ----------
    model_dir
        The path where the model is saved.
    model_name
        The name identifying the model.
    epoch
        The epoch number, which together with the `model_name` identifies the
        model parameters.

    Returns
    -------
    mx.gluon.HybridBlock:
        The deserialized block.
    """
    with (model_dir / f"{model_name}-network.json").open("r") as fp:
        rb = cast(mx.gluon.HybridBlock, load_json(fp.read()))
    rb.load_parameters(
        str(model_dir / f"{model_name}-{epoch:04}.params"),
        ctx=mx.current_context(),
        allow_missing=False,
        ignore_extra=False,
    )
    return rb
Exemple #6
0
    def deserialize(
            cls,
            path: Path,
            ctx: Optional[mx.Context] = None) -> "NBEATSEnsemblePredictor":
        # deserialize constructor parameters
        with (path / "parameters.json").open("r") as fp:
            parameters = load_json(fp.read())

        # basically save each predictor in its own sub-folder
        num_predictors = parameters["num_predictors"]
        num_digits = len(str(num_predictors))

        predictors = []
        # load all the predictors individually and also make sure not to load anything else by mistake
        predictor_locations = [
            f"predictor_{str(index).zfill(num_digits)}"
            for index in range(num_predictors)
        ]

        # deserialize predictors
        for sub_dir in predictor_locations:
            predictors.append(
                RepresentableBlockPredictor.deserialize(path / sub_dir, ctx))

        return NBEATSEnsemblePredictor(
            prediction_length=parameters["prediction_length"],
            freq=parameters["freq"],
            predictors=predictors,
            aggregation_method=parameters["aggregation_method"],
        )
Exemple #7
0
def assert_serializable(x: transform.Transformation):
    t = fqname_for(x.__class__)
    y = load_json(dump_json(x))
    z = load_code(dump_code(x))
    assert dump_json(x) == dump_json(
        y), f"Code serialization for transformer {t} does not work"
    assert dump_code(x) == dump_code(
        z), f"JSON serialization for transformer {t} does not work"
Exemple #8
0
def import_symb_block(
    num_inputs: int, model_dir: Path, model_name: str, epoch: int = 0
) -> mx.gluon.SymbolBlock:
    """
    Deserializes a hybridized Gluon `HybridBlock` as a `SymbolBlock`.

    Parameters
    ----------
    num_inputs
        The number of inputs of the serialized block.
    model_dir
        The path where the model is saved.
    model_name
        The name identifying the model.
    epoch
        The epoch number, which together with the `model_name` identifies the
        model parameters.

    Returns
    -------
    mx.gluon.SymbolBlock
        The deserialized block.
    """
    if num_inputs == 1:
        input_names = ["data"]
    else:
        input_names = [f"data{i}" for i in range(num_inputs)]

    # FIXME: prevents mxnet from failing with empty saved parameters list
    # FIXME: https://github.com/apache/incubator-mxnet/issues/17488
    param_file: Optional[str] = str(
        model_dir / f"{model_name}-{epoch:04}.params"
    )
    if not mx.nd.load(param_file):
        param_file = None

    # FIXME: mx.gluon.SymbolBlock cannot infer float_type and uses default
    # np.float32
    # FIXME: https://github.com/apache/incubator-mxnet/issues/11849
    sb = mx.gluon.SymbolBlock.imports(
        symbol_file=str(model_dir / f"{model_name}-symbol.json"),
        input_names=input_names,
        param_file=param_file,
        ctx=mx.current_context(),
    )

    # FIXME: try to retrieve input/output format
    # FIXME: https://github.com/apache/incubator-mxnet/issues/17488
    format_json_path = model_dir / f"{model_name}-in_out_format.json"
    if format_json_path.exists():
        with format_json_path.open("r") as fp:
            formats = load_json(fp.read())
            sb._in_format = formats["in_format"]
            sb._out_format = formats["out_format"]

    return sb
Exemple #9
0
def train(arguments):
    """
    Generic train method that trains a specified estimator on a specified
    dataset.
    """

    logger.info("Downloading estimator config.")
    estimator_config = Path(arguments.estimator) / "estimator.json"
    with estimator_config.open() as config_file:
        estimator = serde.load_json(config_file.read())

    logger.info("Downloading dataset.")
    if arguments.s3_dataset is None:
        # load built in dataset
        dataset = datasets.get_dataset(arguments.dataset)
    else:
        # load custom dataset
        s3_dataset_dir = Path(arguments.s3_dataset)
        dataset = common.load_datasets(
            metadata=s3_dataset_dir,
            train=s3_dataset_dir / "train",
            test=s3_dataset_dir / "test",
        )

    logger.info("Starting model training.")
    predictor = estimator.train(dataset.train)
    forecast_it, ts_it = backtest.make_evaluation_predictions(
        dataset=dataset.test,
        predictor=predictor,
        num_samples=int(arguments.num_samples),
    )

    logger.info("Starting model evaluation.")
    evaluator = Evaluator(quantiles=eval(arguments.quantiles))

    agg_metrics, item_metrics = evaluator(ts_it,
                                          forecast_it,
                                          num_series=len(list(dataset.test)))

    # required for metric tracking.
    for name, value in agg_metrics.items():
        logger.info(f"gluonts[metric-{name}]: {value}")

    # save the evaluation results
    metrics_output_dir = Path(arguments.output_data_dir)
    with open(metrics_output_dir / "agg_metrics.json", "w") as f:
        json.dump(agg_metrics, f)
    with open(metrics_output_dir / "item_metrics.csv", "w") as f:
        item_metrics.to_csv(f, index=False)

    # save the model
    model_output_dir = Path(arguments.model_dir)
    predictor.serialize(model_output_dir)
Exemple #10
0
    def deserialize(cls, path: Path):
        try:
            # deserialize constructor parameters
            with (path / "parameters.json").open("r") as fp:
                parameters = load_json(fp.read())

            # deserialize transformation chain
            with (path / "input_transform.json").open("r") as fp:
                transform = load_json(fp.read())

            # deserialize prediction network
            num_inputs = len(parameters["input_names"])
            prediction_net = import_symb_block(num_inputs, path,
                                               "prediction_net")

            return SymbolBlockPredictor(
                input_transform=transform,
                prediction_net=prediction_net,
                **parameters,
            )
        except Exception as e:
            raise IOError(f"Cannot deserialize {fqname_for(cls)}") from e
Exemple #11
0
    def deserialize(cls, path: Path):
        try:
            # deserialize constructor parameters
            with (path / 'parameters.json').open('r') as fp:
                parameters = load_json(fp.read())

            # deserialize transformation chain
            with (path / 'input_transform.json').open('r') as fp:
                transform = load_json(fp.read())

            # deserialize prediction network
            prediction_net = import_repr_block(path, 'prediction_net')

            # input_names is derived from the prediction_net
            if 'input_names' in parameters:
                del parameters['input_names']

            return RepresentableBlockPredictor(
                input_transform=transform,
                prediction_net=prediction_net,
                **parameters,
            )
        except Exception as e:
            raise IOError(f'Cannot deserialize {fqname_for(cls)}') from e
Exemple #12
0
    def deserialize(
        cls, path: Path, ctx: Optional[mx.Context] = None
    ) -> "DeepRenewalProcessPredictor":
        repr_predictor = super().deserialize(path, ctx)
        ctx = repr_predictor.ctx

        with mx.Context(ctx):
            # deserialize constructor parameters
            with (path / "parameters.json").open("r") as fp:
                parameters = load_json(fp.read())
            parameters["ctx"] = ctx

            return DeepRenewalProcessPredictor(
                input_transform=repr_predictor.input_transform,
                prediction_net=repr_predictor.prediction_net,
                **parameters,
            )
Exemple #13
0
    def deserialize(cls,
                    path: Path,
                    ctx: Optional[mx.Context] = None,
                    **kwargs) -> "NBEATSEnsemblePredictor":
        """
        Load a serialized NBEATSEnsemblePredictor from the given path.

        Parameters
        ----------
        path
            Path to the serialized files predictor.
        ctx
            Optional mxnet context parameter to be used with the predictor.
            If nothing is passed will use the GPU if available and CPU
            otherwise.
        """
        # deserialize constructor parameters
        with (path / "parameters.json").open("r") as fp:
            parameters = load_json(fp.read())

        # basically save each predictor in its own sub-folder
        num_predictors = parameters["num_predictors"]
        num_digits = len(str(num_predictors))

        predictors = []
        # load all the predictors individually and also make sure not to load
        # anything else by mistake
        predictor_locations = [
            f"predictor_{str(index).zfill(num_digits)}"
            for index in range(num_predictors)
        ]

        # deserialize predictors
        for sub_dir in predictor_locations:
            predictors.append(
                RepresentableBlockPredictor.deserialize(path / sub_dir, ctx))

        return NBEATSEnsemblePredictor(
            prediction_length=parameters["prediction_length"],
            freq=parameters["freq"],
            predictors=predictors,
            aggregation_method=parameters["aggregation_method"],
        )
Exemple #14
0
    def deserialize(
        cls,
        path: Path,
        # TODO this is temporary, we should make the callable object serializable in the first place
        scaling: Callable[[pd.Series], Tuple[pd.Series,
                                             float]] = mean_abs_scaling,
        **kwargs,
    ) -> "Predictor":
        # deserialize constructor parameters
        with (path / "parameters.json").open("r") as fp:
            parameters = load_json(fp.read())
        loaded_ag_path = parameters["ag_path"]
        del parameters["ag_path"]
        # load tabular model
        ag_model = AutogluonTabularPredictor.load(loaded_ag_path)

        return TabularPredictor(ag_model=ag_model,
                                scaling=scaling,
                                **parameters)
Exemple #15
0
def decode_sagemaker_parameter(value: str) -> Union[list, dict, str]:
    """
    All values passed through the SageMaker API are encoded as strings. Thus
    we pro-actively decode values that seem like arrays or dicts.

    Integer values (e.g. `"1"`) are handled by pydantic models further down
    the pipeline.
    """
    value = value.strip()

    # TODO: is this the right way to do things?
    #       what about fields which start which match the pattern for
    #       some reason?
    is_list = value.startswith("[") and value.endswith("]")
    is_dict = value.startswith("{") and value.endswith("}")

    if is_list or is_dict:
        return load_json(value)
    else:
        return value
Exemple #16
0
def test_json_serialization(e) -> None:
    assert e == serde.load_json(serde.dump_json(e))
Exemple #17
0
def test_json_serialization(e) -> None:
    assert equals(e, serde.load_json(serde.dump_json(e)))
Exemple #18
0
 def deserialize(
         cls,
         path: Path,
         ctx: Optional[mx.Context] = None) -> "RepresentablePredictor":
     with (path / "predictor.json").open("r") as fp:
         return load_json(fp.read())
Exemple #19
0
 def deserialize(cls, path: Path):
     try:
         with (path / "predictor.json").open("r") as fp:
             return load_json(fp.read())
     except Exception as e:
         raise IOError(f"Cannot deserialize {fqname_for(cls)}") from e
Exemple #20
0
from gluonts.model.common import Tensor, NPArrayLike
from gluonts.mx.distribution.distribution import Distribution
from gluonts.mx.distribution import (
    Gaussian,
    StudentT,
    MixtureDistribution,
    GaussianOutput,
    StudentTOutput,
    LaplaceOutput,
    MultivariateGaussianOutput,
    MixtureDistributionOutput,
)
from gluonts.testutil import empirical_cdf
from gluonts.core.serde import dump_json, load_json

serialize_fn_list = [lambda x: x, lambda x: load_json(dump_json(x))]


def plot_samples(s: Tensor, bins: int = 100) -> None:
    from matplotlib import pyplot as plt

    s = s.asnumpy()
    plt.hist(s, bins=bins)
    plt.show()


BINS = np.linspace(-5, 5, 100)


def histogram(samples: NPArrayLike) -> np.ndarray:
    h, _ = np.histogram(samples, bins=BINS, density=True)
Exemple #21
0
def test_json_serialization(e) -> None:
    expected, actual = e, serde.load_json(serde.dump_json(e))
    assert check_equality(expected, actual)
Exemple #22
0
@pytest.mark.parametrize(
    "a",
    [
        mx.nd.random.uniform(shape=(3, 5, 2), dtype="float16"),
        mx.nd.random.uniform(shape=(3, 5, 2), dtype="float32"),
        mx.nd.random.uniform(shape=(3, 5, 2), dtype="float64"),
        mx.nd.array([[1, 2, 3], [-1, -2, 0]], dtype=np.uint8),
        mx.nd.array([[1, 2, 3], [-1, -2, 0]], dtype=np.int32),
        mx.nd.array([[1, 2, 3], [-1, -2, 0]], dtype=np.int64),
        mx.nd.array([[1, 2, 3], [1, 2, 0]], dtype=np.uint8),
    ],
)
@pytest.mark.parametrize(
    "serialize_fn",
    [
        lambda x: serde.load_json(serde.dump_json(x)),
        lambda x: serde.load_binary(serde.dump_binary(x)),
        lambda x: serde.load_code(serde.dump_code(x)),
    ],
)
def test_ndarray_serialization(a, serialize_fn) -> None:
    b = serialize_fn(a)
    assert type(a) == type(b)
    assert a.dtype == b.dtype
    assert a.shape == b.shape
    assert np.all((a == b).asnumpy())


def test_timestamp_encode_decode() -> None:
    now = pd.Timestamp.now()
    assert now == serde.decode(serde.encode(now))
Exemple #23
0
 def deserialize(cls, path: Path) -> "RepresentablePredictor":
     with (path / "predictor.json").open("r") as fp:
         return load_json(fp.read())