def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: mx.ndarray, init_biases: List[mx.ndarray.NDArray] = None, num_epochs: PositiveInt = PositiveInt(5), learning_rate: PositiveFloat = PositiveFloat(1e-2), hybridize: bool = True, ) -> List[np.ndarray]: model_ctx = mx.cpu() arg_proj = distr_output.get_args_proj() arg_proj.initialize() if hybridize: arg_proj.hybridize() if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): param.params[param.prefix + "bias"].initialize( mx.initializer.Constant(bias), force_reinit=True) trainer = mx.gluon.Trainer( arg_proj.collect_params(), "sgd", { "learning_rate": learning_rate, "clip_gradient": 10.0 }, ) # The input data to our model is one-dimensional dummy_data = mx.nd.array(np.ones((len(samples), 1))) train_data = mx.gluon.data.DataLoader( mx.gluon.data.ArrayDataset(dummy_data, samples), batch_size=BATCH_SIZE, shuffle=True, ) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 # inner loop for i, (data, sample_label) in enumerate(train_data): data = data.as_in_context(model_ctx) sample_label = sample_label.as_in_context(model_ctx) with mx.autograd.record(): distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = distr.loss(sample_label) if not hybridize: assert loss.shape == distr.batch_shape loss.backward() trainer.step(BATCH_SIZE) num_batches += 1 cumulative_loss += mx.nd.mean(loss).asscalar() assert not np.isnan(cumulative_loss) print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) if len(distr_args[0].shape) == 1: return [ param.asnumpy() for param in arg_proj(mx.nd.array(np.ones((1, 1)))) ] # alpha parameter of zero inflated Neg Bin was not returned using param[0] ls = [[p.asnumpy() for p in param] for param in arg_proj(mx.nd.array(np.ones((1, 1))))] return reduce(lambda x, y: x + y, ls)
def test_inflated_beta_likelihood( alpha: float, beta: float, hybridize: bool, inflated_at: str, zero_probability: float, one_probability: float, ) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples alphas = mx.nd.zeros((NUM_SAMPLES, )) + alpha betas = mx.nd.zeros((NUM_SAMPLES, )) + beta zero_probabilities = mx.nd.zeros((NUM_SAMPLES, )) + zero_probability one_probabilities = mx.nd.zeros((NUM_SAMPLES, )) + one_probability if inflated_at == "zero": distr = ZeroInflatedBeta(alphas, betas, zero_probability=zero_probabilities) distr_output = ZeroInflatedBetaOutput() elif inflated_at == "one": distr = OneInflatedBeta(alphas, betas, one_probability=one_probabilities) distr_output = OneInflatedBetaOutput() else: distr = ZeroAndOneInflatedBeta( alphas, betas, zero_probability=zero_probabilities, one_probability=one_probabilities, ) distr_output = ZeroAndOneInflatedBetaOutput() samples = distr.sample() init_biases = [ inv_softplus(alpha - START_TOL_MULTIPLE * TOL * alpha), inv_softplus(beta - START_TOL_MULTIPLE * TOL * beta), ] parameters = maximum_likelihood_estimate_sgd( distr_output, samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) if inflated_at == "zero": alpha_hat, beta_hat, zero_probability_hat = parameters assert ( np.abs(zero_probability_hat[0] - zero_probability) < TOL * zero_probability ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}" elif inflated_at == "one": alpha_hat, beta_hat, one_probability_hat = parameters assert ( np.abs(one_probability_hat - one_probability) < TOL * one_probability ), f"one_probability did not match: one_probability = {one_probability}, one_probability_hat = {one_probability_hat}" else: ( alpha_hat, beta_hat, zero_probability_hat, one_probability_hat, ) = parameters assert ( np.abs(zero_probability_hat - zero_probability) < TOL * zero_probability ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}" assert ( np.abs(one_probability_hat - one_probability) < TOL * one_probability ), f"one_probability did not match: one_probability = {one_probability}, one_probability_hat = {one_probability_hat}" assert (np.abs(alpha_hat - alpha) < TOL * alpha ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}" assert (np.abs(beta_hat - beta) < TOL * beta), f"beta did not match: beta = {beta}, beta_hat = {beta_hat}"
def test_piecewise_linear( gamma: float, slopes: np.ndarray, knot_spacings: np.ndarray, hybridize: bool, ) -> None: """ Test to check that minimizing the CRPS recovers the quantile function """ num_samples = 500 # use a few samples for timeout failure gammas = mx.nd.zeros((num_samples, )) + gamma slopess = mx.nd.zeros((num_samples, len(slopes))) + mx.nd.array(slopes) knot_spacingss = mx.nd.zeros( (num_samples, len(knot_spacings))) + mx.nd.array(knot_spacings) pwl_sqf = PiecewiseLinear(gammas, slopess, knot_spacingss) samples = pwl_sqf.sample() # Parameter initialization gamma_init = gamma - START_TOL_MULTIPLE * TOL * gamma slopes_init = slopes - START_TOL_MULTIPLE * TOL * slopes knot_spacings_init = knot_spacings # We perturb knot spacings such that even after the perturbation they sum to 1. mid = len(slopes) // 2 knot_spacings_init[:mid] = (knot_spacings[:mid] - START_TOL_MULTIPLE * TOL * knot_spacings[:mid]) knot_spacings_init[mid:] = (knot_spacings[mid:] + START_TOL_MULTIPLE * TOL * knot_spacings[mid:]) init_biases = [gamma_init, slopes_init, knot_spacings_init] # check if it returns original parameters of mapped gamma_hat, slopes_hat, knot_spacings_hat = maximum_likelihood_estimate_sgd( PiecewiseLinearOutput(len(slopes)), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(20), ) # Since the problem is highly non-convex we may not be able to recover the exact parameters # Here we check if the estimated parameters yield similar function evaluations at different quantile levels. quantile_levels = np.arange(0.1, 1.0, 0.1) # create a LinearSplines instance with the estimated parameters to have access to .quantile pwl_sqf_hat = PiecewiseLinear( mx.nd.array(gamma_hat), mx.nd.array(slopes_hat).expand_dims(axis=0), mx.nd.array(knot_spacings_hat).expand_dims(axis=0), ) # Compute quantiles with the estimated parameters quantiles_hat = np.squeeze( pwl_sqf_hat.quantile_internal( mx.nd.array(quantile_levels).expand_dims(axis=0), axis=1).asnumpy()) # Compute quantiles with the original parameters # Since params is replicated across samples we take only the first entry quantiles = np.squeeze( pwl_sqf.quantile_internal( mx.nd.array(quantile_levels).expand_dims(axis=0).repeat( axis=0, repeats=num_samples), axis=1, ).asnumpy()[0, :]) for ix, (quantile, quantile_hat) in enumerate(zip(quantiles, quantiles_hat)): assert np.abs(quantile_hat - quantile) < TOL * quantile, ( f"quantile level {quantile_levels[ix]} didn't match:" f" " f"q = {quantile}, q_hat = {quantile_hat}")
class BenchFunctests(Bench): num_jobs: Op[PositiveInt] = PositiveInt(DEFAULT_NPROC)
from gluonts.model.predictor import Predictor from gluonts.model.seasonal_naive import SeasonalNaivePredictor from gluonts.support.pandas import forecast_start def generate_random_dataset(num_ts: int, start_time: str, freq: str, min_length: int, max_length: int) -> Dataset: start_timestamp = pd.Timestamp(start_time, freq=freq) for _ in range(num_ts): ts_length = np.random.randint(low=min_length, high=max_length) target = np.random.uniform(size=(ts_length, )) data = {"target": target, "start": start_timestamp} yield data PREDICTION_LENGTH = PositiveInt(30) SEASON_LENGTH = PositiveInt(210) START_TIME = "2018-01-03 14:37:12" # That's a Wednesday MIN_LENGTH = 300 MAX_LENGTH = 400 NUM_TS = 10 @pytest.mark.parametrize("predictor_cls", [SeasonalNaivePredictor, Naive2Predictor]) @pytest.mark.parametrize( "freq", ["1min", "15min", "30min", "1H", "2H", "12H", "7D", "1W", "1M"]) def test_predictor(predictor_cls, freq: str): predictor = predictor_cls( freq=freq, prediction_length=PREDICTION_LENGTH,
class BenchBuild(Bench): num_jobs: Op[PositiveInt] = PositiveInt(DEFAULT_NPROC) configure_args: EnvStr = EnvStr("")
class Bench(BaseModel): enabled: bool = True run_count: PositiveInt = PositiveInt(1)
def test_box_cox_tranform( lambdas: Tuple[float, float], mu_sigma: Tuple[float, float], hybridize: bool, ): ''' Test to check that maximizing the likelihood recovers the parameters ''' # test instance lam_1, lam_2 = lambdas mu, sigma = mu_sigma # generate samples lamdas_1 = mx.nd.zeros((NUM_SAMPLES,)) + lam_1 lamdas_2 = mx.nd.zeros((NUM_SAMPLES,)) + lam_2 transform = InverseBoxCoxTransform(lamdas_1, lamdas_2) mus = mx.nd.zeros((NUM_SAMPLES,)) + mu sigmas = mx.nd.zeros((NUM_SAMPLES,)) + sigma gausian_distr = Gaussian(mus, sigmas) # Here the base distribution is Guassian which is transformed to # non-Gaussian via the inverse Box-Cox transform. # Sampling from `trans_distr` gives non-Gaussian samples trans_distr = TransformedDistribution(gausian_distr, transform) # Given the non-Gaussian samples find the true parameters # of the Box-Cox transformation as well as the underlying Gaussian distribution. samples = trans_distr.sample() init_biases = [ mu - START_TOL_MULTIPLE * TOL * mu, inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma), lam_1 - START_TOL_MULTIPLE * TOL * lam_1, inv_softplus(lam_2 - START_TOL_MULTIPLE * TOL * lam_2), ] mu_hat, sigma_hat, lam_1_hat, lam_2_hat = maximum_likelihood_estimate_sgd( TransformedDistributionOutput( GaussianOutput(), InverseBoxCoxTransformOutput(lb_obs=lam_2, fix_lambda_2=True), ), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(18), ) assert ( np.abs(lam_1_hat - lam_1) < TOL * lam_1 ), f"lam_1 did not match: lam_1 = {lam_1}, lam_1_hat = {lam_1_hat}" # assert ( # np.abs(lam_2_hat - lam_2) < TOL * lam_2 # ), f"lam_2 did not match: lam_2 = {lam_2}, lam_2_hat = {lam_2_hat}" assert np.abs(mu_hat - mu) < TOL * np.abs( mu ), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert ( np.abs(sigma_hat - sigma) < TOL * sigma ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
def test_empirical_distribution(hybridize: bool) -> None: r""" This verifies if the loss implemented by `EmpiricalDistribution` is correct. This is done by recovering parameters of a parametric distribution not by maximizing likelihood but by optimizing CRPS loss on the Monte Carlo samples drawn from the underlying parametric distribution. More precisely, given observations `obs` drawn from the true distribution p(x; \theta^*), we solve \theta_hat = \argmax_{\theta} CRPS(obs, {x}_i) subject to: x_i ~ p(x; \theta) and verify if \theta^* and \theta_hat agree. This test uses Multivariate Gaussian with diagonal covariance. Once multivariate CRPS is implemented in `EmpiricalDistribution` one could use `LowrankMultivariateGaussian` as well. Any univariate distribution whose `sample_rep` is differentiable can also be used in this test. """ num_obs = 2000 dim = 2 # Multivariate CRPS is not implemented in `EmpiricalDistribution`. rank = 0 W = None mu = np.arange(0, dim) / float(dim) D = np.eye(dim) * (np.arange(dim) / dim + 0.5) Sigma = D distr = LowrankMultivariateGaussian( mu=mx.nd.array([mu]), D=mx.nd.array([np.diag(D)]), W=W, dim=dim, rank=rank, ) obs = distr.sample(num_obs).squeeze().asnumpy() theta_hat = maximum_likelihood_estimate_sgd( EmpiricalDistributionOutput( num_samples=200, distr_output=LowrankMultivariateGaussianOutput(dim=dim, rank=rank, sigma_init=0.2, sigma_minimum=0.0), ), obs, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(25), init_biases= None, # todo we would need to rework biases a bit to use it in the multivariate case hybridize=hybridize, ) mu_hat, D_hat = theta_hat W_hat = None distr = LowrankMultivariateGaussian( dim=dim, rank=rank, mu=mx.nd.array([mu_hat]), D=mx.nd.array([D_hat]), W=W_hat, ) Sigma_hat = distr.variance.asnumpy() print(mu_hat, Sigma_hat) assert np.allclose( mu_hat, mu, atol=0.2, rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert np.allclose( Sigma_hat, Sigma, atol=0.1, rtol=0.1 ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"
def test_lowrank_multivariate_gaussian(hybridize: bool, rank: int) -> None: num_samples = 2000 dim = 2 mu = np.arange(0, dim) / float(dim) D = np.eye(dim) * (np.arange(dim) / dim + 0.5) if rank > 0: W = np.sqrt(np.ones((dim, rank)) * 0.2) Sigma = D + W.dot(W.transpose()) W = mx.nd.array([W]) else: Sigma = D W = None distr = LowrankMultivariateGaussian( mu=mx.nd.array([mu]), D=mx.nd.array([np.diag(D)]), W=W, dim=dim, rank=rank, ) assert np.allclose( distr.variance[0].asnumpy(), Sigma, atol=0.1, rtol=0.1 ), f"did not match: sigma = {Sigma}, sigma_hat = {distr.variance[0]}" samples = distr.sample(num_samples).squeeze().asnumpy() theta_hat = maximum_likelihood_estimate_sgd( LowrankMultivariateGaussianOutput(dim=dim, rank=rank, sigma_init=0.2, sigma_minimum=0.0), samples, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(25), init_biases= None, # todo we would need to rework biases a bit to use it in the multivariate case hybridize=hybridize, ) if rank > 0: mu_hat, D_hat, W_hat = theta_hat W_hat = mx.nd.array([W_hat]) else: mu_hat, D_hat = theta_hat W_hat = None distr = LowrankMultivariateGaussian( dim=dim, rank=rank, mu=mx.nd.array([mu_hat]), D=mx.nd.array([D_hat]), W=W_hat, ) Sigma_hat = distr.variance.asnumpy() assert np.allclose( mu_hat, mu, atol=0.2, rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert np.allclose( Sigma_hat, Sigma, atol=0.1, rtol=0.1 ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"