def test_gaussian_likelihood(mu: float, sigma: float, hybridize: bool): """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples mus = mx.nd.zeros((NUM_SAMPLES, )) + mu sigmas = mx.nd.zeros((NUM_SAMPLES, )) + sigma distr = Gaussian(mus, sigmas) samples = distr.sample() init_biases = [ mu - START_TOL_MULTIPLE * TOL * mu, inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma), ] mu_hat, sigma_hat = maximum_likelihood_estimate_sgd( GaussianOutput(), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.001), num_epochs=PositiveInt(5), ) assert (np.abs(mu_hat - mu) < TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert (np.abs(sigma_hat - sigma) < TOL * sigma ), f"alpha did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
def test_box_cox_tranform( lam_1: float, lam_2: float, mu: float, sigma: float, hybridize: bool ): """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples lamdas_1 = mx.nd.zeros((NUM_SAMPLES,)) + lam_1 lamdas_2 = mx.nd.zeros((NUM_SAMPLES,)) + lam_2 transform = InverseBoxCoxTransform(lamdas_1, lamdas_2) mus = mx.nd.zeros((NUM_SAMPLES,)) + mu sigmas = mx.nd.zeros((NUM_SAMPLES,)) + sigma gausian_distr = Gaussian(mus, sigmas) # Here the base distribution is Guassian which is transformed to # non-Gaussian via the inverse Box-Cox transform. # Sampling from `trans_distr` gives non-Gaussian samples trans_distr = TransformedDistribution(gausian_distr, [transform]) # Given the non-Gaussian samples find the true parameters # of the Box-Cox transformation as well as the underlying Gaussian distribution. samples = trans_distr.sample() init_biases = [ mu - START_TOL_MULTIPLE * TOL * mu, inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma), lam_1 - START_TOL_MULTIPLE * TOL * lam_1, inv_softplus(lam_2 - START_TOL_MULTIPLE * TOL * lam_2), ] mu_hat, sigma_hat, lam_1_hat, lam_2_hat = maximum_likelihood_estimate_sgd( TransformedDistributionOutput( GaussianOutput(), InverseBoxCoxTransformOutput(lb_obs=lam_2, fix_lambda_2=True), ), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(18), ) assert ( np.abs(lam_1_hat - lam_1) < TOL * lam_1 ), f"lam_1 did not match: lam_1 = {lam_1}, lam_1_hat = {lam_1_hat}" # assert ( # np.abs(lam_2_hat - lam_2) < TOL * lam_2 # ), f"lam_2 did not match: lam_2 = {lam_2}, lam_2_hat = {lam_2_hat}" assert np.abs(mu_hat - mu) < TOL * np.abs( mu ), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert ( np.abs(sigma_hat - sigma) < TOL * sigma ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
def hyperparameters(): return dict( ctx="cpu", epochs=1, learning_rate=1e-2, hybridize=True, num_hidden_dimensions=[3], num_batches_per_epoch=1, use_symbol_block_predictor=True, distr_output=GaussianOutput(), )
def test_nanmixture_gaussian_inference() -> None: nmdo = NanMixtureOutput(GaussianOutput()) args_proj = nmdo.get_args_proj() args_proj.initialize() args_proj.hybridize() input = mx.nd.ones((NUM_SAMPLES)) trainer = mx.gluon.Trainer( args_proj.collect_params(), "sgd", {"learning_rate": 0.00001} ) mixture_samples = mx.nd.array(np_samples) N = 1000 t = tqdm(list(range(N))) for _ in t: with mx.autograd.record(): distr_args = args_proj(input) d = nmdo.distribution(distr_args) loss = d.loss(mixture_samples) loss.backward() loss_value = loss.mean().asnumpy() t.set_postfix({"loss": loss_value}) trainer.step(NUM_SAMPLES) mu_hat = d.distribution.mu.asnumpy() sigma_hat = d.distribution.sigma.asnumpy() nan_prob_hat = d.nan_prob.asnumpy() assert ( np.abs(mu - mu_hat) < TOL ), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert ( np.abs(sigma - sigma_hat) < TOL ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}" assert ( np.abs(nan_prob - nan_prob_hat) < TOL ), f"nan_prob did not match: nan_prob = {nan_prob}, nan_prob_hat = {nan_prob_hat}"
# check that histograms are close assert (diff(histogram(samples_mix.asnumpy()), histogram(samples_ref.asnumpy())) < 0.05) # can only calculated cdf for gaussians currently if isinstance(distr1, Gaussian) and isinstance(distr2, Gaussian): emp_cdf, edges = empirical_cdf(samples_mix.asnumpy()) calc_cdf = mixture.cdf(mx.nd.array(edges)).asnumpy() assert np.allclose(calc_cdf[1:, :], emp_cdf, atol=1e-2) @pytest.mark.parametrize( "distribution_outputs", [ ((GaussianOutput(), GaussianOutput()), ), ((GaussianOutput(), StudentTOutput(), LaplaceOutput()), ), ((MultivariateGaussianOutput(3), MultivariateGaussianOutput(3)), ), ], ) @pytest.mark.parametrize("serialize_fn", serialize_fn_list) def test_mixture_output(distribution_outputs, serialize_fn) -> None: mdo = MixtureDistributionOutput(*distribution_outputs) args_proj = mdo.get_args_proj() args_proj.initialize() input = mx.nd.ones(shape=(512, 30)) distr_args = args_proj(input) d = mdo.distribution(distr_args)
), f"categorical dist: nan_prob did not match: nan_prob = {nan_prob}, nan_prob_hat = {nan_prob_hat}" n_cat = 3 cat_probs = np.array([0.2, 0.3, 0.5]) cat_samples = np.random.choice( list(range(n_cat)), p=cat_probs, size=NUM_SAMPLES ) cat_samples = np.where( np.random.uniform(size=(NUM_SAMPLES)) > nan_prob, cat_samples, np.nan ) @pytest.mark.parametrize( "distribution_output", [GaussianOutput(), StudentTOutput(), CategoricalOutput(num_cats=2),], ) @pytest.mark.parametrize("serialize_fn", serialize_fn_list) def test_nanmixture_output(distribution_output, serialize_fn) -> None: nmdo = NanMixtureOutput(distribution_output) args_proj = nmdo.get_args_proj() args_proj.initialize() input = mx.nd.ones(shape=(3, 2)) distr_args = args_proj(input) d = nmdo.distribution(distr_args) d = serialize_fn(d)
num_forking=1, num_batches_per_epoch=1, use_symbol_block_predictor=True, ) @pytest.fixture(params=[MQCNNEstimator, MQRNNEstimator], ids=["mqcnn", "mqrnn"]) def Estimator(request): return request.param @pytest.mark.parametrize("hybridize", [True, False]) @pytest.mark.parametrize( "quantiles, distr_output", [([0.1, 0.5, 0.9], None), (None, GaussianOutput())], ) def test_accuracy( Estimator, accuracy_test, hyperparameters, hybridize, quantiles, distr_output, ): hyperparameters.update( num_batches_per_epoch=100, hybridize=hybridize, quantiles=quantiles, distr_output=distr_output, )
ESTIMATOR: TransformerEstimator, TRAINER: Trainer, }, "mqcnn": { LABEL: "MQ-CNN", CAN_USE_EXTERNAL_FEATURES: True, ESTIMATOR: MQCNNEstimator, TRAINER: Trainer, }, } # these parameter are classes but are set as strings in the UI CLASS_PARAMETERS = { "distr_output": { "StudentTOutput()": StudentTOutput(), "GaussianOutput()": GaussianOutput(), "NegativeBinomialOutput()": NegativeBinomialOutput() }, "model": { "ARIMA": ARIMA, "ETSModel": ETSModel }, } class ModelParameterError(ValueError): """Custom exception raised when the GluonTS model parameters chosen by the user are invalid""" pass
NegativeBinomialOutput, PiecewiseLinearOutput, PoissonOutput, StudentTOutput, UniformOutput, DirichletOutput, DirichletMultinomialOutput, DeterministicOutput, ) @pytest.mark.parametrize( "distr_out, data, loc, scale, expected_batch_shape, expected_event_shape", [ ( GaussianOutput(), mx.nd.random.normal(shape=(3, 4, 5, 6)), [None, mx.nd.ones(shape=(3, 4, 5))], [None, mx.nd.ones(shape=(3, 4, 5))], (3, 4, 5), (), ), ( StudentTOutput(), mx.nd.random.normal(shape=(3, 4, 5, 6)), [None, mx.nd.ones(shape=(3, 4, 5))], [None, mx.nd.ones(shape=(3, 4, 5))], (3, 4, 5), (), ), (
ZeroInflatedBetaOutput, ZeroInflatedNegativeBinomialOutput, ZeroInflatedPoissonOutput, ) @pytest.mark.parametrize( "distr_output", [ BetaOutput(), CategoricalOutput(num_cats=3), DeterministicOutput(value=42.0), DirichletMultinomialOutput(dim=3, n_trials=5), DirichletOutput(dim=4), EmpiricalDistributionOutput(num_samples=10, distr_output=GaussianOutput()), GammaOutput(), GaussianOutput(), GenParetoOutput(), LaplaceOutput(), LogitNormalOutput(), LoglogisticOutput(), LowrankMultivariateGaussianOutput(dim=5, rank=2), MultivariateGaussianOutput(dim=4), NegativeBinomialOutput(), OneInflatedBetaOutput(), PiecewiseLinearOutput(num_pieces=10), PoissonOutput(), StudentTOutput(), UniformOutput(), WeibullOutput(),
n_cat = 3 cat_probs = np.array([0.2, 0.3, 0.5]) cat_samples = np.random.choice( list(range(n_cat)), p=cat_probs, size=NUM_SAMPLES ) cat_samples = np.where( np.random.uniform(size=(NUM_SAMPLES)) > nan_prob, cat_samples, np.nan ) @pytest.mark.parametrize( "distribution_output", [ GaussianOutput(), StudentTOutput(), CategoricalOutput(num_cats=2), ], ) @pytest.mark.parametrize("serialize_fn", serialize_fn_list) def test_nanmixture_output(distribution_output, serialize_fn) -> None: nmdo = NanMixtureOutput(distribution_output) args_proj = nmdo.get_args_proj() args_proj.initialize() input = mx.nd.ones(shape=(3, 2)) distr_args = args_proj(input)
num_batches_per_epoch=1, use_symbol_block_predictor=True, ) @pytest.fixture( params=[MQCNNEstimator, MQRNNEstimator], ids=["mqcnn", "mqrnn"] ) def Estimator(request): return request.param @pytest.mark.parametrize("hybridize", [True, False]) @pytest.mark.parametrize( "quantiles, distr_output", [([0.1, 0.5, 0.9], None), (None, GaussianOutput())], ) def test_accuracy( Estimator, accuracy_test, hyperparameters, hybridize, quantiles, distr_output, ): hyperparameters.update( num_batches_per_epoch=100, hybridize=hybridize, quantiles=quantiles, distr_output=distr_output, )