def repos( page: PositiveInt = PositiveInt(1), per_page: PositiveInt = PositiveInt(10), db: Session = Depends(get_db), ): controller: SearchController = get_search_controller(db) return controller.get_searches(int(page), int(per_page))
def repos( page: PositiveInt = PositiveInt(1), per_page: PositiveInt = PositiveInt(10), search_id: Optional[UUID] = None, db: Session = Depends(get_db), ): controller: RepoController = get_repo_controller(db) if search_id: search_id = str(search_id) # type: ignore return controller.get_repos(int(page), int(per_page), search_id=search_id) # type: ignore
def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: mx.ndarray, init_biases: List[mx.ndarray.NDArray] = None, num_epochs: PositiveInt = PositiveInt(5), learning_rate: PositiveFloat = PositiveFloat(1e-2), hybridize: bool = True, ) -> Iterable[float]: model_ctx = mx.cpu() arg_proj = distr_output.get_args_proj() arg_proj.initialize() if hybridize: arg_proj.hybridize() if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): param.params[param.prefix + "bias"].initialize( mx.initializer.Constant(bias), force_reinit=True ) trainer = mx.gluon.Trainer( arg_proj.collect_params(), "sgd", {"learning_rate": learning_rate, "clip_gradient": 10.0}, ) # The input data to our model is one-dimensional dummy_data = mx.nd.array(np.ones((len(samples), 1))) train_data = mx.gluon.data.DataLoader( mx.gluon.data.ArrayDataset(dummy_data, samples), batch_size=BATCH_SIZE, shuffle=True, ) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 # inner loop for i, (data, sample_label) in enumerate(train_data): data = data.as_in_context(model_ctx) sample_label = sample_label.as_in_context(model_ctx) with mx.autograd.record(): distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = distr.loss(sample_label) if not hybridize: assert loss.shape == distr.batch_shape loss.backward() trainer.step(BATCH_SIZE) num_batches += 1 cumulative_loss += mx.nd.mean(loss).asscalar() print("Epoch %s, loss: %s" % (e, cumulative_loss / num_batches)) return [ param[0].asnumpy() for param in arg_proj(mx.nd.array(np.ones((1, 1)))) ]
def test_dirichlet_multinomial(hybridize: bool) -> None: num_samples = 8000 dim = 3 n_trials = 500 alpha = np.array([1.0, 2.0, 3.0]) distr = DirichletMultinomial(dim=3, n_trials=n_trials, alpha=mx.nd.array(alpha)) cov = distr.variance.asnumpy() samples = distr.sample(num_samples) alpha_hat = maximum_likelihood_estimate_sgd( DirichletMultinomialOutput(dim=dim, n_trials=n_trials), samples, init_biases=None, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) distr = DirichletMultinomial(dim=3, n_trials=n_trials, alpha=mx.nd.array(alpha_hat)) cov_hat = distr.variance.asnumpy() assert np.allclose( alpha_hat, alpha, atol=0.1, rtol=0.1 ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}" assert np.allclose( cov_hat, cov, atol=0.1, rtol=0.1 ), f"Covariance did not match: cov = {cov}, cov_hat = {cov_hat}"
def test_gamma_likelihood(alpha: float, beta: float, hybridize: bool) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples alphas = mx.nd.zeros((NUM_SAMPLES, )) + alpha betas = mx.nd.zeros((NUM_SAMPLES, )) + beta distr = Gamma(alphas, betas) samples = distr.sample() init_biases = [ inv_softplus(alpha - START_TOL_MULTIPLE * TOL * alpha), inv_softplus(beta - START_TOL_MULTIPLE * TOL * beta), ] alpha_hat, beta_hat = maximum_likelihood_estimate_sgd( GammaOutput(), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(5), ) assert (np.abs(alpha_hat - alpha) < TOL * alpha ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}" assert (np.abs(beta_hat - beta) < TOL * beta), f"beta did not match: beta = {beta}, beta_hat = {beta_hat}"
def test_categorical_likelihood(num_cats: int, cat_probs: np.ndarray, hybridize: bool): """ Test to check that maximizing the likelihood recovers the parameters """ cat_prob = mx.nd.array(cat_probs) cat_probs = mx.nd.zeros((NUM_SAMPLES, num_cats)) + cat_prob distr = Categorical(cat_probs.log()) samples = distr.sample() cat_prob_init = mx.nd.random_uniform(1 - TOL, 1 + TOL, num_cats) * cat_prob cat_prob_init = cat_prob_init / cat_prob_init.sum() init_biases = [cat_prob_init] cat_log_prob_hat = maximum_likelihood_estimate_sgd( CategoricalOutput(num_cats), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(25), ) cat_prob_hat = np.exp(cat_log_prob_hat) prob_deviation = np.abs(cat_prob_hat - cat_prob.asnumpy()).flatten() tolerance = (TOL * cat_prob.asnumpy()).flatten() assert np.all( np.less(prob_deviation, tolerance) ), f"cat_prob did not match: cat_prob = {cat_prob}, cat_prob_hat = {cat_prob_hat}"
def test_poisson_likelihood(rate: float, hybridize: bool) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples rates = mx.nd.zeros(NUM_SAMPLES) + rate distr = Poisson(rates) samples = distr.sample() init_biases = [inv_softplus(rate - START_TOL_MULTIPLE * TOL * rate)] rate_hat = maximum_likelihood_estimate_sgd( PoissonOutput(), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(20), ) print("rate:", rate_hat) assert (np.abs(rate_hat[0] - rate) < TOL * rate), f"mu did not match: rate = {rate}, rate_hat = {rate_hat}"
def test_beta_likelihood(concentration1: float, concentration0: float) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples concentration1s = torch.zeros((NUM_SAMPLES, )) + concentration1 concentration0s = torch.zeros((NUM_SAMPLES, )) + concentration0 distr = Beta(concentration1s, concentration0s) samples = distr.sample() init_biases = [ inv_softplus(concentration1 - START_TOL_MULTIPLE * TOL * concentration1), inv_softplus(concentration0 - START_TOL_MULTIPLE * TOL * concentration0), ] concentration1_hat, concentration0_hat = maximum_likelihood_estimate_sgd( BetaOutput(), samples, init_biases=init_biases, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) assert ( np.abs(concentration1_hat - concentration1) < TOL * concentration1 ), f"concentration1 did not match: concentration1 = {concentration1}, concentration1_hat = {concentration1_hat}" assert ( np.abs(concentration0_hat - concentration0) < TOL * concentration0 ), f"concentration0 did not match: concentration0 = {concentration0}, concentration0_hat = {concentration0_hat}"
def maximum_likelihood_estimate_sgd( distr_output: DistributionOutput, samples: torch.Tensor, init_biases: List[np.ndarray] = None, num_epochs: PositiveInt = PositiveInt(5), learning_rate: PositiveFloat = PositiveFloat(1e-2), ): arg_proj = distr_output.get_args_proj(in_features=1) if init_biases is not None: for param, bias in zip(arg_proj.proj, init_biases): nn.init.constant_(param.bias, bias) dummy_data = torch.ones((len(samples), 1)) dataset = TensorDataset(dummy_data, samples) train_data = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) optimizer = SGD(arg_proj.parameters(), lr=learning_rate) for e in range(num_epochs): cumulative_loss = 0 num_batches = 0 for i, (data, sample_label) in enumerate(train_data): optimizer.zero_grad() distr_args = arg_proj(data) distr = distr_output.distribution(distr_args) loss = -distr.log_prob(sample_label).mean() loss.backward() clip_grad_norm_(arg_proj.parameters(), 10.0) optimizer.step() num_batches += 1 cumulative_loss += loss.item() if len(distr_args[0].shape) == 1: return [ param.detach().numpy() for param in arg_proj(torch.ones((1, 1))) ] return [ param[0].detach().numpy() for param in arg_proj(torch.ones((1, 1))) ]
def test_gaussian_likelihood(mu: float, sigma: float, hybridize: bool): ''' Test to check that maximizing the likelihood recovers the parameters ''' # generate samples mus = mx.nd.zeros((NUM_SAMPLES, )) + mu sigmas = mx.nd.zeros((NUM_SAMPLES, )) + sigma distr = Gaussian(mus, sigmas) samples = distr.sample() init_biases = [ mu - START_TOL_MULTIPLE * TOL * mu, inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma), ] mu_hat, sigma_hat = maximum_likelihood_estimate_sgd( GaussianOutput(), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.001), num_epochs=PositiveInt(5), ) assert (np.abs(mu_hat - mu) < TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert (np.abs(sigma_hat - sigma) < TOL * sigma ), f"alpha did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
def repos_search( search_options: SearchOptions = Body( ..., example={ "qualifiers": [ {"language": "Elixir", "created": ">2020-01-01"}, {"language": "Python"}, ] }, ), page: PositiveInt = PositiveInt(1), per_page: PositiveInt = PositiveInt(10), db: Session = Depends(get_db), ): controller: RepoController = get_repo_controller(db) return controller.search_repos(search_options, int(page), int(per_page))
def test_neg_binomial(mu_alpha: Tuple[float, float], hybridize: bool) -> None: ''' Test to check that maximizing the likelihood recovers the parameters ''' # test instance mu, alpha = mu_alpha # generate samples mus = mx.nd.zeros((NUM_SAMPLES, )) + mu alphas = mx.nd.zeros((NUM_SAMPLES, )) + alpha neg_bin_distr = NegativeBinomial(mu=mus, alpha=alphas) samples = neg_bin_distr.sample() init_biases = [ inv_softplus(mu - START_TOL_MULTIPLE * TOL * mu), inv_softplus(alpha + START_TOL_MULTIPLE * TOL * alpha), ] mu_hat, alpha_hat = maximum_likelihood_estimate_sgd( NegativeBinomialOutput(), samples, hybridize=hybridize, init_biases=init_biases, num_epochs=PositiveInt(15), ) assert (np.abs(mu_hat - mu) < TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert (np.abs(alpha_hat - alpha) < TOL * alpha ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}"
def test_deterministic_l2(mu: float, hybridize: bool) -> None: ''' Test to check that maximizing the likelihood recovers the parameters. This tests uses the Gaussian distribution with fixed variance and sample mean. This essentially reduces to determistic L2. ''' # generate samples mu = mu mus = mx.nd.zeros(NUM_SAMPLES) + mu deterministic_distr = Gaussian(mu=mus, sigma=0.1 * mx.nd.ones_like(mus)) samples = deterministic_distr.sample() class GaussianFixedVarianceOutput(GaussianOutput): @classmethod def domain_map(cls, F, mu, sigma): sigma = 0.1 * F.ones_like(sigma) return mu.squeeze(axis=-1), sigma.squeeze(axis=-1) mu_hat, _ = maximum_likelihood_estimate_sgd( GaussianFixedVarianceOutput(), samples, init_biases=[3 * mu, 0.1], hybridize=hybridize, num_epochs=PositiveInt(1), ) assert (np.abs(mu_hat - mu) < TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}"
def test_weibull_likelihood(rate: float, shape: float, hybridize: bool) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples rates = mx.nd.zeros((NUM_SAMPLES, )) + rate shapes = mx.nd.zeros((NUM_SAMPLES, )) + shape distr = Weibull(rates, shapes) samples = distr.sample() init_biases = [ inv_softplus(rate - START_TOL_MULTIPLE * TOL * rate), inv_softplus(shape - START_TOL_MULTIPLE * TOL * shape), ] rate_hat, shape_hat = maximum_likelihood_estimate_sgd( WeibullOutput(), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) print("rate:", rate_hat, "shape:", shape_hat) assert (np.abs(rate_hat - rate) < TOL * rate), f"rate did not match: rate = {rate}, rate_hat = {rate_hat}" assert (np.abs(shape_hat - shape) < TOL * shape ), f"shape did not match: shape = {shape}, shape_hat = {shape_hat}"
def test_genpareto_likelihood(xi: float, beta: float, hybridize: bool) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples xis = mx.nd.zeros((NUM_SAMPLES, )) + xi betas = mx.nd.zeros((NUM_SAMPLES, )) + beta distr = GenPareto(xis, betas) samples = distr.sample() init_biases = [ inv_softplus(xi - START_TOL_MULTIPLE * TOL * xi), inv_softplus(beta - START_TOL_MULTIPLE * TOL * beta), ] xi_hat, beta_hat = maximum_likelihood_estimate_sgd( GenParetoOutput(), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) print("XI:", xi_hat, "BETA:", beta_hat) assert (np.abs(xi_hat - xi) < TOL * xi), f"alpha did not match: xi = {xi}, xi_hat = {xi_hat}" assert (np.abs(beta_hat - beta) < TOL * beta), f"beta did not match: beta = {beta}, beta_hat = {beta_hat}"
def test_loglogistic_likelihood(mu: float, sigma: float, hybridize: bool) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples mus = mx.nd.zeros((NUM_SAMPLES, )) + mu sigmas = mx.nd.zeros((NUM_SAMPLES, )) + sigma distr = Loglogistic(mus, sigmas) samples = distr.sample() init_biases = [ mu - START_TOL_MULTIPLE * TOL * mu, inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma), ] mu_hat, sigma_hat = maximum_likelihood_estimate_sgd( LoglogisticOutput(), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) print("mu:", mu_hat, "sigma:", sigma_hat) assert (np.abs(mu_hat - mu) < TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert (np.abs(sigma_hat - sigma) < TOL * sigma ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
def test_gamma_likelihood(concentration: float, rate: float) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples concentrations = torch.zeros((NUM_SAMPLES,)) + concentration rates = torch.zeros((NUM_SAMPLES,)) + rate distr = Gamma(concentrations, rates) samples = distr.sample() init_biases = [ inv_softplus(concentration - START_TOL_MULTIPLE * TOL * concentration), inv_softplus(rate - START_TOL_MULTIPLE * TOL * rate), ] concentration_hat, rate_hat = maximum_likelihood_estimate_sgd( GammaOutput(), samples, init_biases=init_biases, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) assert ( np.abs(concentration_hat - concentration) < TOL * concentration ), f"concentration did not match: concentration = {concentration}, concentration_hat = {concentration_hat}" assert ( np.abs(rate_hat - rate) < TOL * rate ), f"rate did not match: rate = {rate}, rate_hat = {rate_hat}"
def test_binned_likelihood(num_bins: float, bin_probabilites: np.ndarray, hybridize: bool): ''' Test to check that maximizing the likelihood recovers the parameters ''' bin_prob = mx.nd.array(bin_probabilites) bin_center = mx.nd.array(np.logspace(-1, 1, num_bins)) # generate samples bin_probs = mx.nd.zeros((NUM_SAMPLES, num_bins)) + bin_prob bin_centers = mx.nd.zeros((NUM_SAMPLES, num_bins)) + bin_center distr = Binned(bin_probs, bin_centers) samples = distr.sample() # add some jitter to the uniform initialization and normalize bin_prob_init = mx.nd.random_uniform(1 - TOL, 1 + TOL, num_bins) * bin_prob bin_prob_init = bin_prob_init / bin_prob_init.sum() init_biases = [bin_prob_init] bin_prob_hat, = maximum_likelihood_estimate_sgd( BinnedOutput(list(bin_center.asnumpy())), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(25), ) assert all( mx.nd.abs(mx.nd.array(bin_prob_hat) - bin_prob) < TOL * bin_prob ), f"bin_prob did not match: bin_prob = {bin_prob}, bin_prob_hat = {bin_prob_hat}"
def test_crud_list_query(mocker, loop, mock_sqlalchemy_filters): def __query(*_args, **__kwargs): return mock_query apply_filters, apply_sort = mock_sqlalchemy_filters mock_query = mocker.Mock() mock_query.options = mocker.Mock(side_effect=__query) mock_query.offset = mocker.Mock(side_effect=__query) mock_query.limit = mocker.Mock(side_effect=__query) mock_query.all = mocker.Mock(return_value=[]) session = mocker.Mock() session.query = mocker.Mock(return_value=mock_query) filter_spec = [{"filter1": "value1"}] sort_spec = [{"sort1": "value1"}] options = ("option1", "option2") offset = NonNegativeInt(10) limit = PositiveInt(50) loop.run_until_complete( crud.list_instances(Person, session, filter_spec, sort_spec, offset, limit, options)) assert apply_filters.call_args == mocker.call(mock_query, filter_spec) assert apply_sort.call_args == mocker.call(mock_query, sort_spec) assert mock_query.options.call_args == mocker.call(options) assert mock_query.offset.call_args == mocker.call(offset) assert mock_query.limit.call_args == mocker.call(limit)
def test_dirichlet(hybridize: bool) -> None: num_samples = 2000 dim = 3 alpha = np.random.gamma(shape=1, size=dim) + 0.5 distr = Dirichlet(alpha=mx.nd.array(alpha)) cov = distr.variance.asnumpy() samples = distr.sample(num_samples) alpha_hat = maximum_likelihood_estimate_sgd( DirichletOutput(dim=dim), samples, init_biases=None, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) distr = Dirichlet(alpha=mx.nd.array(alpha_hat)) cov_hat = distr.variance.asnumpy() assert np.allclose( alpha_hat, alpha, atol=0.1, rtol=0.1 ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}" assert np.allclose( cov_hat, cov, atol=0.1, rtol=0.1 ), f"Covariance did not match: cov = {cov}, cov_hat = {cov_hat}"
def test_box_cox_tranform( lambdas: Tuple[float, float], mu_sigma: Tuple[float, float], hybridize: bool, ): ''' Test to check that maximizing the likelihood recovers the parameters ''' # test instance lam_1, lam_2 = lambdas mu, sigma = mu_sigma # generate samples lamdas_1 = mx.nd.zeros((NUM_SAMPLES, )) + lam_1 lamdas_2 = mx.nd.zeros((NUM_SAMPLES, )) + lam_2 transform = InverseBoxCoxTransform(lamdas_1, lamdas_2) mus = mx.nd.zeros((NUM_SAMPLES, )) + mu sigmas = mx.nd.zeros((NUM_SAMPLES, )) + sigma gausian_distr = Gaussian(mus, sigmas) # Here the base distribution is Guassian which is transformed to # non-Gaussian via the inverse Box-Cox transform. # Sampling from `trans_distr` gives non-Gaussian samples trans_distr = TransformedDistribution(gausian_distr, transform) # Given the non-Gaussian samples find the true parameters # of the Box-Cox transformation as well as the underlying Gaussian distribution. samples = trans_distr.sample() init_biases = [ mu - START_TOL_MULTIPLE * TOL * mu, inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma), lam_1 - START_TOL_MULTIPLE * TOL * lam_1, inv_softplus(lam_2 - START_TOL_MULTIPLE * TOL * lam_2), ] mu_hat, sigma_hat, lam_1_hat, lam_2_hat = maximum_likelihood_estimate_sgd( TransformedDistributionOutput( GaussianOutput(), InverseBoxCoxTransformOutput(lb_obs=lam_2, fix_lambda_2=True), ), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(18), ) assert (np.abs(lam_1_hat - lam_1) < TOL * lam_1 ), f"lam_1 did not match: lam_1 = {lam_1}, lam_1_hat = {lam_1_hat}" # assert ( # np.abs(lam_2_hat - lam_2) < TOL * lam_2 # ), f"lam_2 did not match: lam_2 = {lam_2}, lam_2_hat = {lam_2_hat}" assert np.abs(mu_hat - mu) < TOL * np.abs( mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert (np.abs(sigma_hat - sigma) < TOL * sigma ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}"
def test_lowrank_multivariate_gaussian(hybridize: bool) -> None: num_samples = 2000 dim = 2 rank = 1 mu = np.arange(0, dim) / float(dim) D = np.eye(dim) * (np.arange(dim) / dim + 0.5) W = np.sqrt(np.ones((dim, rank)) * 0.2) Sigma = D + W.dot(W.transpose()) distr = LowrankMultivariateGaussian( mu=mx.nd.array([mu]), D=mx.nd.array([np.diag(D)]), W=mx.nd.array([W]), dim=dim, rank=rank, ) assert np.allclose( distr.variance[0].asnumpy(), Sigma, atol=0.1, rtol=0.1 ), f"did not match: sigma = {Sigma}, sigma_hat = {distr.variance[0]}" samples = distr.sample(num_samples).squeeze().asnumpy() mu_hat, D_hat, W_hat = maximum_likelihood_estimate_sgd( LowrankMultivariateGaussianOutput(dim=dim, rank=rank, sigma_init=0.2, sigma_minimum=0.0), samples, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(25), init_biases= None, # todo we would need to rework biases a bit to use it in the multivariate case hybridize=hybridize, ) distr = LowrankMultivariateGaussian( dim=dim, rank=rank, mu=mx.nd.array([mu_hat]), D=mx.nd.array([D_hat]), W=mx.nd.array([W_hat]), ) Sigma_hat = distr.variance.asnumpy() assert np.allclose( mu_hat, mu, atol=0.2, rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert np.allclose( Sigma_hat, Sigma, atol=0.1, rtol=0.1 ), f"sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"
def test_inflated_neg_binomial_likelihood( mu: float, alpha: float, zero_probability: float, hybridize: bool, ) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples num_samples = 2000 # Required for convergence distr = ZeroInflatedNegativeBinomialOutput().distribution(distr_args=[ mx.nd.array([[1 - zero_probability, zero_probability]]), # mixture probs mx.nd.array([mu, alpha]), # loc, shape of Neg Bin mx.nd.array([0.0]), ]) distr_output = ZeroInflatedNegativeBinomialOutput() samples = distr.sample(num_samples).squeeze() init_biases = None ( (_, zero_probability_hat), mu_hat, alpha_hat, _, ) = maximum_likelihood_estimate_sgd( distr_output=distr_output, samples=samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.1), num_epochs=PositiveInt(20), ) assert ( np.abs(zero_probability_hat - zero_probability) < TOL * zero_probability ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}" assert (np.abs(mu_hat - mu) < TOL * mu), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert (np.abs(alpha_hat - alpha) < TOL * alpha ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}"
def test_studentT_likelihood( mu_sigma_nu: Tuple[float, float, float], hybridize: bool ) -> None: ''' Test to check that maximizing the likelihood recovers the parameters ''' # test instance mu, sigma, nu = mu_sigma_nu # generate samples mus = mx.nd.zeros((NUM_SAMPLES, 1)) + mu sigmas = mx.nd.zeros((NUM_SAMPLES, 1)) + sigma nus = mx.nd.zeros((NUM_SAMPLES, 1)) + nu distr = StudentT(mus, sigmas, nus) samples = distr.sample() # nu takes very long to learn, so we initialize it at the true value. # transform used is softplus(x) + 2 init_bias = [ mu - START_TOL_MULTIPLE * TOL * mu, inv_softplus(sigma - START_TOL_MULTIPLE * TOL * sigma), inv_softplus(nu - 2), ] mu_hat, sigma_hat, nu_hat = maximum_likelihood_estimate_sgd( StudentTOutput(), samples, init_biases=init_bias, hybridize=hybridize, num_epochs=PositiveInt(10), learning_rate=1e-2, ) assert ( np.abs(mu_hat - mu) < TOL * mu ), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert ( np.abs(sigma_hat - sigma) < TOL * sigma ), f"sigma did not match: sigma = {sigma}, sigma_hat = {sigma_hat}" assert ( np.abs(nu_hat - nu) < TOL * nu ), "nu0 did not match: nu0 = %s, nu_hat = %s" % (nu, nu_hat)
def test_inflated_poisson_likelihood( rate: float, hybridize: bool, zero_probability: float, ) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples num_samples = 2000 # Required for convergence distr = ZeroInflatedPoissonOutput().distribution(distr_args=[ mx.nd.array([[1 - zero_probability, zero_probability]]), mx.nd.array([rate]), mx.nd.array([0.0]), ]) distr_output = ZeroInflatedPoissonOutput() samples = distr.sample(num_samples).squeeze() init_biases = None (_, zero_probability_hat), rate_hat, _ = maximum_likelihood_estimate_sgd( distr_output=distr_output, samples=samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.15), num_epochs=PositiveInt(25), ) assert ( np.abs(zero_probability_hat - zero_probability) < TOL * zero_probability ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}" assert (np.abs(rate_hat - rate) < TOL * rate), f"rate did not match: rate = {rate}, rate_hat = {rate_hat}"
def test_multivariate_gaussian() -> None: num_samples = 2000 dim = 2 mu = np.arange(0, dim) / float(dim) L_diag = np.ones((dim, )) L_low = 0.1 * np.ones((dim, dim)) * np.tri(dim, k=-1) L = np.diag(L_diag) + L_low Sigma = L.dot(L.transpose()) distr = MultivariateGaussian(mu=mx.nd.array(mu), L=mx.nd.array(L)) samples = distr.sample(num_samples) mu_hat, L_hat = maximum_likelihood_estimate_sgd( MultivariateGaussianOutput(dim=dim), samples, init_biases= None, # todo we would need to rework biases a bit to use it in the multivariate case hybridize=False, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(10), ) distr = MultivariateGaussian(mu=mx.nd.array([mu_hat]), L=mx.nd.array([L_hat])) Sigma_hat = distr.variance[0].asnumpy() assert np.allclose( mu_hat, mu, atol=0.1, rtol=0.1), f"mu did not match: mu = {mu}, mu_hat = {mu_hat}" assert np.allclose( Sigma_hat, Sigma, atol=0.1, rtol=0.1 ), f"Sigma did not match: sigma = {Sigma}, sigma_hat = {Sigma_hat}"
from gluonts.dataset.common import Dataset from gluonts.support.pandas import forecast_start def generate_random_dataset( num_ts: int, start_time: str, freq: str, min_length: int, max_length: int ) -> Dataset: start_timestamp = pd.Timestamp(start_time, freq=freq) for _ in range(num_ts): ts_length = np.random.randint(low=min_length, high=max_length) target = np.random.uniform(size=(ts_length,)) data = {"target": target, "start": start_timestamp} yield data PREDICTION_LENGTH = PositiveInt(30) SEASON_LENGTH = PositiveInt(210) START_TIME = "2018-01-03 14:37:12" # That's a Wednesday MIN_LENGTH = 300 MAX_LENGTH = 400 NUM_TS = 10 @pytest.mark.parametrize( "predictor_cls", [SeasonalNaivePredictor, Naive2Predictor] ) @pytest.mark.parametrize( "freq", ["1min", "15min", "30min", "1H", "2H", "12H", "7D", "1W", "1M"] ) def test_seasonal_naive(predictor_cls, freq: str): predictor = predictor_cls(
class BenchFunctests(Bench): num_jobs: Op[PositiveInt] = PositiveInt(DEFAULT_NPROC)
def test_piecewise_linear( gamma: float, slopes: np.ndarray, knot_spacings: np.ndarray, hybridize: bool, ) -> None: ''' Test to check that minimizing the CRPS recovers the quantile function ''' num_samples = 500 # use a few samples for timeout failure gammas = mx.nd.zeros((num_samples, )) + gamma slopess = mx.nd.zeros((num_samples, len(slopes))) + mx.nd.array(slopes) knot_spacingss = mx.nd.zeros( (num_samples, len(knot_spacings))) + mx.nd.array(knot_spacings) pwl_sqf = PiecewiseLinear(gammas, slopess, knot_spacingss) samples = pwl_sqf.sample() # Parameter initialization gamma_init = gamma - START_TOL_MULTIPLE * TOL * gamma slopes_init = slopes - START_TOL_MULTIPLE * TOL * slopes knot_spacings_init = knot_spacings # We perturb knot spacings such that even after the perturbation they sum to 1. mid = len(slopes) // 2 knot_spacings_init[:mid] = (knot_spacings[:mid] - START_TOL_MULTIPLE * TOL * knot_spacings[:mid]) knot_spacings_init[mid:] = (knot_spacings[mid:] + START_TOL_MULTIPLE * TOL * knot_spacings[mid:]) init_biases = [gamma_init, slopes_init, knot_spacings_init] # check if it returns original parameters of mapped gamma_hat, slopes_hat, knot_spacings_hat = maximum_likelihood_estimate_sgd( PiecewiseLinearOutput(len(slopes)), samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.01), num_epochs=PositiveInt(20), ) # Since the problem is highly non-convex we may not be able to recover the exact parameters # Here we check if the estimated parameters yield similar function evaluations at different quantile levels. quantile_levels = np.arange(0.1, 1.0, 0.1) # create a LinearSplines instance with the estimated parameters to have access to .quantile pwl_sqf_hat = PiecewiseLinear( mx.nd.array(gamma_hat), mx.nd.array(slopes_hat).expand_dims(axis=0), mx.nd.array(knot_spacings_hat).expand_dims(axis=0), ) # Compute quantiles with the estimated parameters quantiles_hat = np.squeeze( pwl_sqf_hat.quantile(mx.nd.array(quantile_levels).expand_dims(axis=0), axis=1).asnumpy()) # Compute quantiles with the original parameters # Since params is replicated across samples we take only the first entry quantiles = np.squeeze( pwl_sqf.quantile( mx.nd.array(quantile_levels).expand_dims(axis=0).repeat( axis=0, repeats=num_samples), axis=1, ).asnumpy()[0, :]) for ix, (quantile, quantile_hat) in enumerate(zip(quantiles, quantiles_hat)): assert np.abs(quantile_hat - quantile) < TOL * quantile, ( f"quantile level {quantile_levels[ix]} didn't match:" f" " f"q = {quantile}, q_hat = {quantile_hat}")
def test_inflated_beta_likelihood( alpha: float, beta: float, hybridize: bool, inflated_at: str, zero_probability: float, one_probability: float, ) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples alphas = mx.nd.zeros((NUM_SAMPLES, )) + alpha betas = mx.nd.zeros((NUM_SAMPLES, )) + beta zero_probabilities = mx.nd.zeros((NUM_SAMPLES, )) + zero_probability one_probabilities = mx.nd.zeros((NUM_SAMPLES, )) + one_probability if inflated_at == "zero": distr = ZeroInflatedBeta(alphas, betas, zero_probability=zero_probabilities) distr_output = ZeroInflatedBetaOutput() elif inflated_at == "one": distr = OneInflatedBeta(alphas, betas, one_probability=one_probabilities) distr_output = OneInflatedBetaOutput() else: distr = ZeroAndOneInflatedBeta( alphas, betas, zero_probability=zero_probabilities, one_probability=one_probabilities, ) distr_output = ZeroAndOneInflatedBetaOutput() samples = distr.sample() init_biases = [ inv_softplus(alpha - START_TOL_MULTIPLE * TOL * alpha), inv_softplus(beta - START_TOL_MULTIPLE * TOL * beta), ] parameters = maximum_likelihood_estimate_sgd( distr_output, samples, init_biases=init_biases, hybridize=hybridize, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) if inflated_at == "zero": alpha_hat, beta_hat, zero_probability_hat = parameters assert ( np.abs(zero_probability_hat[0] - zero_probability) < TOL * zero_probability ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}" elif inflated_at == "one": alpha_hat, beta_hat, one_probability_hat = parameters assert ( np.abs(one_probability_hat - one_probability) < TOL * one_probability ), f"one_probability did not match: one_probability = {one_probability}, one_probability_hat = {one_probability_hat}" else: ( alpha_hat, beta_hat, zero_probability_hat, one_probability_hat, ) = parameters assert ( np.abs(zero_probability_hat - zero_probability) < TOL * zero_probability ), f"zero_probability did not match: zero_probability = {zero_probability}, zero_probability_hat = {zero_probability_hat}" assert ( np.abs(one_probability_hat - one_probability) < TOL * one_probability ), f"one_probability did not match: one_probability = {one_probability}, one_probability_hat = {one_probability_hat}" assert (np.abs(alpha_hat - alpha) < TOL * alpha ), f"alpha did not match: alpha = {alpha}, alpha_hat = {alpha_hat}" assert (np.abs(beta_hat - beta) < TOL * beta), f"beta did not match: beta = {beta}, beta_hat = {beta_hat}"