예제 #1
0
파일: cbo_lcem.py 프로젝트: tangzhenyu/ax
    def get_and_fit_model(
        self,
        Xs: List[Tensor],
        Ys: List[Tensor],
        Yvars: List[Tensor],
        task_features: List[int],
        fidelity_features: List[int],
        metric_names: List[str],
        state_dict: Optional[Dict[str, Tensor]] = None,
        fidelity_model_id: Optional[int] = None,
        **kwargs: Any,
    ) -> ModelListGP:
        """Get a fitted multi-task contextual GP model for each outcome.
        Args:
            Xs: List of X data, one tensor per outcome.
            Ys: List of Y data, one tensor per outcome.
            Yvars:List of Noise variance of Yvar data, one tensor per outcome.
        Returns: Fitted multi-task contextual GP model.
        """

        models = []
        for i, X in enumerate(Xs):
            # validate input Yvars
            Yvar = Yvars[i].clamp_min_(MIN_OBSERVED_NOISE_LEVEL)
            is_nan = torch.isnan(Yvar)
            all_nan_Yvar = torch.all(is_nan)
            if all_nan_Yvar:
                gp_m = LCEMGP(
                    train_X=X,
                    train_Y=Ys[i],
                    task_feature=task_features[i],
                    context_cat_feature=self.context_cat_feature,
                    context_emb_feature=self.context_emb_feature,
                    embs_dim_list=self.embs_dim_list,
                )
            else:
                gp_m = FixedNoiseLCEMGP(
                    train_X=X,
                    train_Y=Ys[i],
                    train_Yvar=Yvar,
                    task_feature=task_features[i],
                    context_cat_feature=self.context_cat_feature,
                    context_emb_feature=self.context_emb_feature,
                    embs_dim_list=self.embs_dim_list,
                )
            models.append(gp_m)
        # Use a ModelListGP
        model = ModelListGP(*models)
        model.to(Xs[0])
        mll = SumMarginalLogLikelihood(model.likelihood, model)
        fit_gpytorch_model(mll)
        return model
예제 #2
0
def _get_single_task_gpytorch_model(
    Xs: List[Tensor],
    Ys: List[Tensor],
    Yvars: List[Tensor],
    task_features: List[int],
    fidelity_features: List[int],
    state_dict: Optional[Dict[str, Tensor]] = None,
    num_samples: int = 512,
    thinning: int = 16,
    use_input_warping: bool = False,
    gp_kernel: str = "matern",
    **kwargs: Any,
) -> ModelListGP:
    r"""Instantiates a batched GPyTorchModel(ModelListGP) based on the given data.
    The model fitting is based on MCMC and is run separately using pyro. The MCMC
    samples will be loaded into the model instantiated here afterwards.

    Returns:
        A ModelListGP.
    """
    if len(task_features) > 0:
        raise NotImplementedError(
            "Currently do not support MT-GP models with MCMC!")
    if len(fidelity_features) > 0:
        raise NotImplementedError(
            "Fidelity MF-GP models are not currently supported with MCMC!")

    num_mcmc_samples = num_samples // thinning
    covar_modules = [
        _get_rbf_kernel(num_samples=num_mcmc_samples, dim=Xs[0].shape[-1])
        if gp_kernel == "rbf" else None for _ in range(len(Xs))
    ]

    models = [
        _get_model(
            X=X.unsqueeze(0).expand(num_mcmc_samples, X.shape[0], -1),
            Y=Y.unsqueeze(0).expand(num_mcmc_samples, Y.shape[0], -1),
            Yvar=Yvar.unsqueeze(0).expand(num_mcmc_samples, Yvar.shape[0], -1),
            fidelity_features=fidelity_features,
            use_input_warping=use_input_warping,
            covar_module=covar_module,
            **kwargs,
        ) for X, Y, Yvar, covar_module in zip(Xs, Ys, Yvars, covar_modules)
    ]
    model = ModelListGP(*models)
    model.to(Xs[0])
    return model
예제 #3
0
파일: cbo_lcea.py 프로젝트: proteanblank/Ax
    def get_and_fit_model(
        self,
        Xs: List[Tensor],
        Ys: List[Tensor],
        Yvars: List[Tensor],
        task_features: List[int],
        fidelity_features: List[int],
        metric_names: List[str],
        state_dict: Optional[Dict[str, Tensor]] = None,
        fidelity_model_id: Optional[int] = None,
        **kwargs: Any,
    ) -> GPyTorchModel:
        """Get a fitted LCEAGP model for each outcome.
        Args:
            Xs: X for each outcome.
            Ys: Y for each outcome.
            Yvars: Noise variance of Y for each outcome.
        Returns: Fitted LCEAGP model.
        """
        # generate model space decomposition dict
        decomp_index = generate_model_space_decomposition(
            decomposition=self.decomposition, feature_names=self.feature_names
        )

        models = []
        for i, X in enumerate(Xs):
            Yvar = Yvars[i].clamp_min_(MIN_OBSERVED_NOISE_LEVEL)
            gp_m, _ = get_map_model(
                train_X=X,
                train_Y=Ys[i],
                train_Yvar=Yvar,
                decomposition=decomp_index,
                train_embedding=self.train_embedding,
                cat_feature_dict=self.cat_feature_dict,
                embs_feature_dict=self.embs_feature_dict,
                embs_dim_list=self.embs_dim_list,
                context_weight_dict=self.context_weight_dict,
            )
            models.append(gp_m)

        if len(models) == 1:
            model = models[0]
        else:
            model = ModelListGP(*models)
        model.to(Xs[0])
        return model
예제 #4
0
    def get_and_fit_model(
        self,
        Xs: List[Tensor],
        Ys: List[Tensor],
        Yvars: List[Tensor],
        state_dicts: Optional[List[MutableMapping[str, Tensor]]] = None,
    ) -> GPyTorchModel:
        """Get a fitted ALEBO model for each outcome.

        Args:
            Xs: X for each outcome, already projected down.
            Ys: Y for each outcome.
            Yvars: Noise variance of Y for each outcome.
            state_dicts: State dicts to initialize model fitting.

        Returns: Fitted ALEBO model.
        """
        if state_dicts is None:
            state_dicts = [None] * len(Xs)
            fit_restarts = self.fit_restarts
        else:
            fit_restarts = 1  # Warm-started
        Yvars = [Yvar.clamp_min_(1e-7) for Yvar in Yvars]
        models = [
            get_fitted_model(
                B=self.B,
                train_X=X,
                train_Y=Ys[i],
                train_Yvar=Yvars[i],
                restarts=fit_restarts,
                nsamp=self.laplace_nsamp,
                # pyre-fixme[6]: Expected `Optional[Dict[str, Tensor]]` for 7th
                #  param but got `Optional[MutableMapping[str, Tensor]]`.
                init_state_dict=state_dicts[i],
            )
            for i, X in enumerate(Xs)
        ]
        if len(models) == 1:
            model = models[0]
        else:
            model = ModelListGP(*models)
        model.to(Xs[0])
        return model
예제 #5
0
파일: cbo_sac.py 프로젝트: proteanblank/Ax
    def get_and_fit_model(
        self,
        Xs: List[Tensor],
        Ys: List[Tensor],
        Yvars: List[Tensor],
        task_features: List[int],
        fidelity_features: List[int],
        metric_names: List[str],
        state_dict: Optional[Dict[str, Tensor]] = None,
        fidelity_model_id: Optional[int] = None,
        **kwargs: Any,
    ) -> GPyTorchModel:
        """Get a fitted StructuralAdditiveContextualGP model for each outcome.
        Args:
            Xs: X for each outcome.
            Ys: Y for each outcome.
            Yvars: Noise variance of Y for each outcome.
        Returns: Fitted StructuralAdditiveContextualGP model.
        """
        # generate model space decomposition dict
        decomp_index = generate_model_space_decomposition(
            decomposition=self.decomposition, feature_names=self.feature_names)

        models = []
        for i, X in enumerate(Xs):
            Yvar = Yvars[i].clamp_min_(MIN_OBSERVED_NOISE_LEVEL)
            gp_m = SACGP(X, Ys[i], Yvar, decomp_index)
            mll = ExactMarginalLogLikelihood(gp_m.likelihood, gp_m)
            fit_gpytorch_model(mll)
            models.append(gp_m)

        if len(models) == 1:
            model = models[0]
        else:
            model = ModelListGP(*models)
        model.to(Xs[0])
        return model
예제 #6
0
def get_and_fit_model(
    Xs: List[Tensor],
    Ys: List[Tensor],
    Yvars: List[Tensor],
    task_features: List[int],
    state_dict: Optional[Dict[str, Tensor]] = None,
    **kwargs: Any,
) -> GPyTorchModel:
    r"""Instantiates and fits a botorch ModelListGP using the given data.

    Args:
        Xs: List of X data, one tensor per outcome
        Ys: List of Y data, one tensor per outcome
        Yvars: List of observed variance of Ys.
        task_features: List of columns of X that are tasks.
        state_dict: If provided, will set model parameters to this state
            dictionary. Otherwise, will fit the model.

    Returns:
        A fitted ModelListGP.
    """
    model = None
    if len(task_features) > 1:
        raise ValueError(
            f"This model only supports 1 task feature (got {task_features})")
    elif len(task_features) == 1:
        task_feature = task_features[0]
    else:
        task_feature = None
    if task_feature is None:
        if len(Xs) == 1:
            # Use single output, single task GP
            model = _get_model(X=Xs[0],
                               Y=Ys[0],
                               Yvar=Yvars[0],
                               task_feature=task_feature)
        elif all(torch.equal(Xs[0], X) for X in Xs[1:]):
            # Use batched multioutput, single task GP
            Y = torch.cat(Ys, dim=-1)
            Yvar = torch.cat(Yvars, dim=-1)
            model = _get_model(X=Xs[0],
                               Y=Y,
                               Yvar=Yvar,
                               task_feature=task_feature)
    if model is None:
        # Use model list
        models = [
            _get_model(X=X, Y=Y, Yvar=Yvar, task_feature=task_feature)
            for X, Y, Yvar in zip(Xs, Ys, Yvars)
        ]
        model = ModelListGP(gp_models=models)
    model.to(dtype=Xs[0].dtype, device=Xs[0].device)  # pyre-ignore
    if state_dict is None:
        # TODO: Add bounds for optimization stability - requires revamp upstream
        bounds = {}
        if isinstance(model, ModelListGP):
            mll = SumMarginalLogLikelihood(model.likelihood, model)
        else:
            # pyre-ignore: [16]
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
        mll = fit_gpytorch_model(mll, bounds=bounds)
    else:
        model.load_state_dict(state_dict)
    return model
예제 #7
0
def get_and_fit_model(
    Xs: List[Tensor],
    Ys: List[Tensor],
    Yvars: List[Tensor],
    task_features: List[int],
    fidelity_features: List[int],
    state_dict: Optional[Dict[str, Tensor]] = None,
    refit_model: bool = True,
    **kwargs: Any,
) -> GPyTorchModel:
    r"""Instantiates and fits a botorch ModelListGP using the given data.

    Args:
        Xs: List of X data, one tensor per outcome
        Ys: List of Y data, one tensor per outcome
        Yvars: List of observed variance of Ys.
        task_features: List of columns of X that are tasks.
        fidelity_features: List of columns of X that are fidelity parameters.
        state_dict: If provided, will set model parameters to this state
            dictionary. Otherwise, will fit the model.
        refit_model: Flag for refitting model.

    Returns:
        A fitted GPyTorchModel.
    """
    if len(fidelity_features) > 0 and len(task_features) > 0:
        raise NotImplementedError(
            "Currently do not support MF-GP models with task_features!"
        )
    if len(fidelity_features) > 1:
        raise NotImplementedError(
            "Fidelity MF-GP models currently support only a single fidelity parameter!"
        )
    if len(task_features) > 1:
        raise NotImplementedError(
            f"This model only supports 1 task feature (got {task_features})"
        )
    elif len(task_features) == 1:
        task_feature = task_features[0]
    else:
        task_feature = None
    model = None
    if task_feature is None:
        if len(Xs) == 1:
            # Use single output, single task GP
            model = _get_model(
                X=Xs[0],
                Y=Ys[0],
                Yvar=Yvars[0],
                task_feature=task_feature,
                fidelity_features=fidelity_features,
                **kwargs,
            )
        elif all(torch.equal(Xs[0], X) for X in Xs[1:]):
            # Use batched multioutput, single task GP
            Y = torch.cat(Ys, dim=-1)
            Yvar = torch.cat(Yvars, dim=-1)
            model = _get_model(
                X=Xs[0],
                Y=Y,
                Yvar=Yvar,
                task_feature=task_feature,
                fidelity_features=fidelity_features,
                **kwargs,
            )
    if model is None:
        # Use a ModelListGP
        models = [
            _get_model(X=X, Y=Y, Yvar=Yvar, task_feature=task_feature, **kwargs)
            for X, Y, Yvar in zip(Xs, Ys, Yvars)
        ]
        model = ModelListGP(*models)
    model.to(Xs[0])
    if state_dict is not None:
        model.load_state_dict(state_dict)
    if state_dict is None or refit_model:
        # TODO: Add bounds for optimization stability - requires revamp upstream
        bounds = {}
        if isinstance(model, ModelListGP):
            mll = SumMarginalLogLikelihood(model.likelihood, model)
        else:
            # pyre-ignore: [16]
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
        mll = fit_gpytorch_model(mll, bounds=bounds)
    return model
예제 #8
0
def get_and_fit_model(
    Xs: List[Tensor],
    Ys: List[Tensor],
    Yvars: List[Tensor],
    task_features: List[int],
    fidelity_features: List[int],
    metric_names: List[str],
    state_dict: Optional[Dict[str, Tensor]] = None,
    refit_model: bool = True,
    **kwargs: Any,
) -> GPyTorchModel:
    r"""Instantiates and fits a botorch GPyTorchModel using the given data.
    N.B. Currently, the logic for choosing ModelListGP vs other models is handled
    using if-else statements in lines 96-137. In the future, this logic should be
    taken care of by modular botorch.

    Args:
        Xs: List of X data, one tensor per outcome.
        Ys: List of Y data, one tensor per outcome.
        Yvars: List of observed variance of Ys.
        task_features: List of columns of X that are tasks.
        fidelity_features: List of columns of X that are fidelity parameters.
        metric_names: Names of each outcome Y in Ys.
        state_dict: If provided, will set model parameters to this state
            dictionary. Otherwise, will fit the model.
        refit_model: Flag for refitting model.

    Returns:
        A fitted GPyTorchModel.
    """

    if len(fidelity_features) > 0 and len(task_features) > 0:
        raise NotImplementedError(
            "Currently do not support MF-GP models with task_features!")
    if len(fidelity_features) > 1:
        raise NotImplementedError(
            "Fidelity MF-GP models currently support only a single fidelity parameter!"
        )
    if len(task_features) > 1:
        raise NotImplementedError(
            f"This model only supports 1 task feature (got {task_features})")
    elif len(task_features) == 1:
        task_feature = task_features[0]
    else:
        task_feature = None
    model = None

    # TODO: Better logic for deciding when to use a ModelListGP. Currently the
    # logic is unclear. The two cases in which ModelListGP is used are
    # (i) the training inputs (Xs) are not the same for the different outcomes, and
    # (ii) a multi-task model is used

    if task_feature is None:
        if len(Xs) == 1:
            # Use single output, single task GP
            model = _get_model(
                X=Xs[0],
                Y=Ys[0],
                Yvar=Yvars[0],
                task_feature=task_feature,
                fidelity_features=fidelity_features,
                **kwargs,
            )
        elif all(torch.equal(Xs[0], X) for X in Xs[1:]):
            # Use batched multioutput, single task GP
            Y = torch.cat(Ys, dim=-1)
            Yvar = torch.cat(Yvars, dim=-1)
            model = _get_model(
                X=Xs[0],
                Y=Y,
                Yvar=Yvar,
                task_feature=task_feature,
                fidelity_features=fidelity_features,
                **kwargs,
            )
    # TODO: Is this equivalent an "else:" here?

    if model is None:  # use multi-task GP
        mtgp_rank_dict = kwargs.pop("multitask_gp_ranks", {})
        # assembles list of ranks associated with each metric
        if len({len(Xs), len(Ys), len(Yvars), len(metric_names)}) > 1:
            raise ValueError(
                "Lengths of Xs, Ys, Yvars, and metric_names must match. Your "
                f"inputs have lengths {len(Xs)}, {len(Ys)}, {len(Yvars)}, and "
                f"{len(metric_names)}, respectively.")
        mtgp_rank_list = [
            mtgp_rank_dict.get(metric, None) for metric in metric_names
        ]
        models = [
            _get_model(X=X,
                       Y=Y,
                       Yvar=Yvar,
                       task_feature=task_feature,
                       rank=mtgp_rank,
                       **kwargs)
            for X, Y, Yvar, mtgp_rank in zip(Xs, Ys, Yvars, mtgp_rank_list)
        ]
        model = ModelListGP(*models)
    model.to(Xs[0])
    if state_dict is not None:
        model.load_state_dict(state_dict)
    if state_dict is None or refit_model:
        # TODO: Add bounds for optimization stability - requires revamp upstream
        bounds = {}
        if isinstance(model, ModelListGP):
            mll = SumMarginalLogLikelihood(model.likelihood, model)
        else:
            # pyre-ignore: [16]
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
        mll = fit_gpytorch_model(mll, bounds=bounds)
    return model
예제 #9
0
파일: cbo_lcem.py 프로젝트: viotemp1/Ax
    def get_and_fit_model(
        self,
        Xs: List[Tensor],
        Ys: List[Tensor],
        Yvars: List[Tensor],
        task_features: List[int],
        fidelity_features: List[int],
        metric_names: List[str],
        state_dict: Optional[Dict[str, Tensor]] = None,
        fidelity_model_id: Optional[int] = None,
        **kwargs: Any,
    ) -> ModelListGP:
        """Get a fitted multi-task contextual GP model for each outcome.
        Args:
            Xs: List of X data, one tensor per outcome.
            Ys: List of Y data, one tensor per outcome.
            Yvars:List of Noise variance of Yvar data, one tensor per outcome.
            task_features: List of columns of X that are tasks.
        Returns: ModeListGP that each model is a fitted LCEM GP model.
        """

        if len(task_features) == 1:
            task_feature = task_features[0]
        elif len(task_features) > 1:
            raise NotImplementedError(
                f"LCEMBO only supports 1 task feature (got {task_features})")
        else:
            raise ValueError("LCEMBO requires context input as task features")

        models = []
        for i, X in enumerate(Xs):
            # validate input Yvars
            Yvar = Yvars[i].clamp_min_(MIN_OBSERVED_NOISE_LEVEL)
            is_nan = torch.isnan(Yvar)
            all_nan_Yvar = torch.all(is_nan)
            if all_nan_Yvar:
                gp_m = LCEMGP(
                    train_X=X,
                    train_Y=Ys[i],
                    task_feature=task_feature,
                    context_cat_feature=self.context_cat_feature,
                    context_emb_feature=self.context_emb_feature,
                    embs_dim_list=self.embs_dim_list,
                )
            else:
                gp_m = FixedNoiseLCEMGP(
                    train_X=X,
                    train_Y=Ys[i],
                    train_Yvar=Yvar,
                    task_feature=task_feature,
                    context_cat_feature=self.context_cat_feature,
                    context_emb_feature=self.context_emb_feature,
                    embs_dim_list=self.embs_dim_list,
                )
            models.append(gp_m)
        # Use a ModelListGP
        model = ModelListGP(*models)
        model.to(Xs[0])
        mll = SumMarginalLogLikelihood(model.likelihood, model)
        fit_gpytorch_model(mll)
        return model
예제 #10
0
def get_and_fit_model_mcmc(
    Xs: List[Tensor],
    Ys: List[Tensor],
    Yvars: List[Tensor],
    task_features: List[int],
    fidelity_features: List[int],
    metric_names: List[str],
    state_dict: Optional[Dict[str, Tensor]] = None,
    refit_model: bool = True,
    use_input_warping: bool = False,
    use_loocv_pseudo_likelihood: bool = False,
    num_samples: int = 512,
    warmup_steps: int = 1024,
    thinning: int = 16,
    max_tree_depth: int = 6,
    use_saas: bool = False,
    disable_progbar: bool = False,
    **kwargs: Any,
) -> GPyTorchModel:
    if len(task_features) > 0:
        raise NotImplementedError(
            "Currently do not support MT-GP models with MCMC!")
    if len(fidelity_features) > 0:
        raise NotImplementedError(
            "Fidelity MF-GP models are not currently supported with MCMC!")
    model = None
    # TODO: Better logic for deciding when to use a ModelListGP. Currently the
    # logic is unclear. The two cases in which ModelListGP is used are
    # (i) the training inputs (Xs) are not the same for the different outcomes, and
    # (ii) a multi-task model is used

    num_mcmc_samples = num_samples // thinning
    if len(Xs) == 1:
        # Use single output, single task GP
        model = _get_model(
            X=Xs[0].unsqueeze(0).expand(num_mcmc_samples, Xs[0].shape[0], -1),
            Y=Ys[0].unsqueeze(0).expand(num_mcmc_samples, Xs[0].shape[0], -1),
            Yvar=Yvars[0].unsqueeze(0).expand(num_mcmc_samples, Xs[0].shape[0],
                                              -1),
            fidelity_features=fidelity_features,
            use_input_warping=use_input_warping,
            **kwargs,
        )
    else:
        models = [
            _get_model(
                X=X.unsqueeze(0).expand(num_mcmc_samples, X.shape[0],
                                        -1).clone(),
                Y=Y.unsqueeze(0).expand(num_mcmc_samples, Y.shape[0],
                                        -1).clone(),
                Yvar=Yvar.unsqueeze(0).expand(num_mcmc_samples, Yvar.shape[0],
                                              -1).clone(),
                use_input_warping=use_input_warping,
                **kwargs,
            ) for X, Y, Yvar in zip(Xs, Ys, Yvars)
        ]
        model = ModelListGP(*models)
    model.to(Xs[0])
    if isinstance(model, ModelListGP):
        models = model.models
    else:
        models = [model]
    if state_dict is not None:
        # pyre-fixme[6]: Expected `OrderedDict[typing.Any, typing.Any]` for 1st
        #  param but got `Dict[str, Tensor]`.
        model.load_state_dict(state_dict)
    if state_dict is None or refit_model:
        for X, Y, Yvar, m in zip(Xs, Ys, Yvars, models):
            samples = run_inference(
                pyro_model=pyro_model,  # pyre-ignore [6]
                X=X,
                Y=Y,
                Yvar=Yvar,
                num_samples=num_samples,
                warmup_steps=warmup_steps,
                thinning=thinning,
                use_input_warping=use_input_warping,
                use_saas=use_saas,
                max_tree_depth=max_tree_depth,
                disable_progbar=disable_progbar,
            )
            if "noise" in samples:
                m.likelihood.noise_covar.noise = (
                    samples["noise"].detach().clone().view(
                        m.likelihood.noise_covar.noise.shape).clamp_min(
                            MIN_INFERRED_NOISE_LEVEL))
            m.covar_module.base_kernel.lengthscale = (
                samples["lengthscale"].detach().clone().view(
                    m.covar_module.base_kernel.lengthscale.shape))
            m.covar_module.outputscale = (
                samples["outputscale"].detach().clone().view(
                    m.covar_module.outputscale.shape))
            m.mean_module.constant.data = (
                samples["mean"].detach().clone().view(
                    m.mean_module.constant.shape))
            if "c0" in samples:
                m.input_transform._set_concentration(
                    i=0,
                    value=samples["c0"].detach().clone().view(
                        m.input_transform.concentration0.shape),
                )
                m.input_transform._set_concentration(
                    i=1,
                    value=samples["c1"].detach().clone().view(
                        m.input_transform.concentration1.shape),
                )
    return model
예제 #11
0
def get_and_fit_model(
    Xs: List[Tensor],
    Ys: List[Tensor],
    Yvars: List[Tensor],
    task_features: List[int],
    fidelity_features: List[int],
    state_dict: Optional[Dict[str, Tensor]] = None,
    fidelity_model_id: Optional[int] = None,
    **kwargs: Any,
) -> GPyTorchModel:
    r"""Instantiates and fits a botorch ModelListGP using the given data.

    Args:
        Xs: List of X data, one tensor per outcome
        Ys: List of Y data, one tensor per outcome
        Yvars: List of observed variance of Ys.
        task_features: List of columns of X that are tasks.
        fidelity_features: List of columns of X that are fidelity parameters.
        state_dict: If provided, will set model parameters to this state
            dictionary. Otherwise, will fit the model.
        fidelity_model_id: set this if you want to use GP models from `model_list`
            defined above. The `SingleTaskGPLTKernel` model uses linear truncated
            kernel; the `SingleTaskMultiFidelityGP` model uses exponential decay
            kernel.

    Returns:
        A fitted ModelListGP.
    """
    if fidelity_model_id is not None and len(task_features) > 0:
        raise NotImplementedError(
            "Currently do not support MF-GP models with task_features!")
    if fidelity_model_id is not None and len(fidelity_features) > 1:
        raise UnsupportedError(
            "Fidelity MF-GP models currently support only one fidelity parameter!"
        )
    model = None
    if len(task_features) > 1:
        raise ValueError(
            f"This model only supports 1 task feature (got {task_features})")
    elif len(task_features) == 1:
        task_feature = task_features[0]
    else:
        task_feature = None
    if task_feature is None:
        if len(Xs) == 1:
            # Use single output, single task GP
            model = _get_model(
                X=Xs[0],
                Y=Ys[0],
                Yvar=Yvars[0],
                task_feature=task_feature,
                fidelity_features=fidelity_features,
                fidelity_model_id=fidelity_model_id,
            )
        elif all(torch.equal(Xs[0], X) for X in Xs[1:]):
            # Use batched multioutput, single task GP
            Y = torch.cat(Ys, dim=-1)
            Yvar = torch.cat(Yvars, dim=-1)
            model = _get_model(
                X=Xs[0],
                Y=Y,
                Yvar=Yvar,
                task_feature=task_feature,
                fidelity_features=fidelity_features,
                fidelity_model_id=fidelity_model_id,
            )
    if model is None:
        # Use model list
        models = [
            _get_model(X=X, Y=Y, Yvar=Yvar, task_feature=task_feature)
            for X, Y, Yvar in zip(Xs, Ys, Yvars)
        ]
        model = ModelListGP(*models)
    model.to(dtype=Xs[0].dtype, device=Xs[0].device)  # pyre-ignore
    if state_dict is None:
        # TODO: Add bounds for optimization stability - requires revamp upstream
        bounds = {}
        if isinstance(model, ModelListGP):
            mll = SumMarginalLogLikelihood(model.likelihood, model)
        else:
            # pyre-ignore: [16]
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
        mll = fit_gpytorch_model(mll, bounds=bounds)
    else:
        model.load_state_dict(state_dict)
    return model