Beispiel #1
0
 def sample_params(self, n_samples=500):
     if 'mr_list' in dir(self.mr):
         # sample for each submodel
         sample_size_list = self.mr.compute_sample_sizes(n_samples)
         param_samples = [
             LimeTr.sampleSoln(sub_mr.lt, sample_size=ss)
             for sub_mr, ss in zip(self.mr.mr_list, sample_size_list)
         ]
         given_samples = {
             'given_beta_samples_list': [i[0] for i in param_samples],
             'given_gamma_samples_list': [i[1] for i in param_samples]
         }
     else:
         beta_samples, gamma_samples = LimeTr.sampleSoln(
             self.mr.lt, sample_size=n_samples)
         given_samples = {
             'given_beta_samples': beta_samples,
             'given_gamma_samples': gamma_samples
         }
     self.given_samples = given_samples
Beispiel #2
0
def get_parameter_samples(mr, n_samples=1000):
    # sample for each submodel
    sample_size_list = mr.compute_sample_sizes(n_samples)
    param_samples = [
        LimeTr.sampleSoln(sub_mr.lt, sample_size=ss)
        for sub_mr, ss in zip(mr.mr_list, sample_size_list)
    ]
    given_samples = {
        'given_beta_samples_list': [i[0] for i in param_samples],
        'given_gamma_samples_list': [i[1] for i in param_samples]
    }

    return given_samples
Beispiel #3
0
    def predictData(self,
                    pred_x_cov_list,
                    pred_z_cov_list,
                    sample_size,
                    pred_study_sizes=None,
                    given_beta_samples=None,
                    given_gamma_samples=None,
                    ref_point=None,
                    include_random_effect=True):
        # sample solutions
        if given_beta_samples is None or given_gamma_samples is None:
            beta_samples, gamma_samples = LimeTr.sampleSoln(
                self.lt, sample_size=sample_size)
        else:
            beta_samples = given_beta_samples
            gamma_samples = given_gamma_samples

        # calculate the beta and gamma post cov
        # self.beta_samples_mean = np.mean(beta_samples, axis=0)
        # self.gamma_samples_mean = np.mean(gamma_samples, axis=0)

        # self.beta_samples_cov = \
        #     beta_samples.T.dot(beta_samples)/sample_size - \
        #     np.outer(self.beta_samples_mean, self.beta_samples_mean)
        # self.gamma_samples_cov = \
        #     gamma_samples.T.dot(gamma_samples)/sample_size - \
        #     np.outer(self.gamma_samples_mean, self.gamma_samples_mean)

        # create x cov
        (pred_F, pred_JF, pred_F_list, pred_JF_list,
         pred_id_beta_list) = utils.constructPredXCov(pred_x_cov_list, self)

        # create z cov
        (pred_Z, pred_Z_list,
         pred_id_gamma_list) = utils.constructPredZCov(pred_z_cov_list, self)

        # num of studies
        pred_num_obs = pred_Z.shape[0]

        # create observation samples
        y_samples = np.vstack([pred_F(beta) for beta in beta_samples])

        if ref_point is not None:
            x_cov_spline_id = [
                x_cov['spline_id'] for x_cov in pred_x_cov_list
                if 'spline' in x_cov['cov_type']
            ]
            if len(x_cov_spline_id) == 0:
                raise Exception("Error: no spline x cov")
            if len(x_cov_spline_id) >= 2:
                raise Exception("Error: multiple spline x covs")

            spline = self.spline_list[x_cov_spline_id[0]]
            ref_risk = spline.designMat(np.array([ref_point])).dot(
                beta_samples[:,
                             self.id_spline_beta_list[x_cov_spline_id[0]]].T)

            y_samples /= ref_risk.reshape(sample_size, 1)

        pred_gamma = np.hstack([
            self.gamma_soln[pred_id_gamma_list[i]]
            for i in range(len(pred_id_gamma_list))
        ])

        if include_random_effect:
            if self.rr_random_slope:
                u = np.random.randn(sample_size, self.k_gamma)*\
                    np.sqrt(self.gamma_soln)
                # zu = np.sum(pred_Z*u, axis=1)
                zu = u[:, 0]

                valid_x_cov_id = [
                    i for i in range(len(pred_x_cov_list))
                    if pred_x_cov_list[i]['cov_type'] == 'spline'
                ]

                if len(valid_x_cov_id) == 0:
                    raise Exception(
                        "Error: no suitable x cov for random slope model.")
                if len(valid_x_cov_id) >= 2:
                    raise Exception(
                        "Error: multiple x cov for random slope model.")

                mat = pred_x_cov_list[valid_x_cov_id[0]]['mat']
                if ref_point is None:
                    y_samples *= np.exp(np.outer(zu, mat - mat[0]))
                else:
                    y_samples *= np.exp(np.outer(zu, mat - ref_point))
            else:
                if pred_study_sizes is None:
                    pred_study_sizes = np.array([1] * pred_num_obs)
                else:
                    assert sum(pred_study_sizes) == pred_num_obs

                pred_num_studies = len(pred_study_sizes)

                pred_Z_sub = np.split(pred_Z, np.cumsum(pred_study_sizes)[:-1])
                u = [
                    np.random.multivariate_normal(
                        np.zeros(pred_study_sizes[i]),
                        (pred_Z_sub[i] * pred_gamma).dot(pred_Z_sub[i].T),
                        sample_size) for i in range(pred_num_studies)
                ]
                U = np.hstack(u)

                if np.any([
                        'log_ratio' in self.x_cov_list[i]['cov_type']
                        for i in range(len(self.x_cov_list))
                ]):
                    y_samples *= np.exp(U)
                else:
                    y_samples += U

        return y_samples, beta_samples, gamma_samples, pred_F, pred_Z
Beispiel #4
0
class MRBRT:
    """MR-BRT Object
    """
    def __init__(self,
                 data: MRData,
                 cov_models: List[CovModel],
                 inlier_pct: float = 1.0):
        """Constructor of MRBRT.

        Args:
            data (MRData): Data for meta-regression.
            cov_models (List[CovModel]): A list of covariates models.
            inlier_pct (float, optional):
                A float number between 0 and 1 indicate the percentage of inliers.
        """
        self.data = data
        self.cov_models = cov_models
        self.inlier_pct = inlier_pct
        self.check_input()
        self.cov_model_names = [
            cov_model.name for cov_model in self.cov_models
        ]
        self.num_cov_models = len(self.cov_models)
        self.cov_names = []
        for cov_model in self.cov_models:
            self.cov_names.extend(cov_model.covs)
        self.num_covs = len(self.cov_names)

        # attach data to cov_model
        for cov_model in self.cov_models:
            cov_model.attach_data(self.data)

        # fixed effects size and index
        self.x_vars_sizes = [
            cov_model.num_x_vars for cov_model in self.cov_models
        ]
        self.x_vars_indices = utils.sizes_to_indices(self.x_vars_sizes)
        self.num_x_vars = sum(self.x_vars_sizes)

        # random effects size and index
        self.z_vars_sizes = [
            cov_model.num_z_vars for cov_model in self.cov_models
        ]
        self.z_vars_indices = utils.sizes_to_indices(self.z_vars_sizes)
        self.num_z_vars = sum(self.z_vars_sizes)

        self.num_vars = self.num_x_vars + self.num_z_vars

        # number of constraints
        self.num_constraints = sum(
            [cov_model.num_constraints for cov_model in self.cov_models])

        # number of regularizations
        self.num_regularizations = sum(
            [cov_model.num_regularizations for cov_model in self.cov_models])

        # place holder for the limetr objective
        self.lt = None
        self.beta_soln = None
        self.gamma_soln = None
        self.u_soln = None
        self.w_soln = None
        self.re_soln = None

    def check_input(self):
        """Check the input type of the attributes.
        """
        assert isinstance(self.data, MRData)
        assert isinstance(self.cov_models, list)
        assert all(
            [isinstance(cov_model, CovModel) for cov_model in self.cov_models])
        assert (self.inlier_pct >= 0.0) and (self.inlier_pct <= 1.0)

    def get_cov_model(self, name: str) -> CovModel:
        """Choose covariate model with name.
        """
        index = self.get_cov_model_index(name)
        return self.cov_models[index]

    def get_cov_model_index(self, name: str) -> int:
        """From cov_model name get the index.
        """
        matching_index = [
            index for index, cov_model_name in enumerate(self.cov_model_names)
            if cov_model_name == name
        ]
        num_matching_index = len(matching_index)
        assert num_matching_index == 1, f"Number of matching index is {num_matching_index}."
        return matching_index[0]

    def create_x_fun(self, data=None):
        """Create the fixed effects function, link with limetr.
        """
        data = self.data if data is None else data
        # create design functions
        design_funs = [
            cov_model.create_x_fun(data) for cov_model in self.cov_models
        ]
        funs, jac_funs = list(zip(*design_funs))

        def x_fun(beta, funs=funs):
            return sum(
                fun(beta[self.x_vars_indices[i]])
                for i, fun in enumerate(funs))

        def x_jac_fun(beta, jac_funs=jac_funs):
            return np.hstack([
                jac_fun(beta[self.x_vars_indices[i]])
                for i, jac_fun in enumerate(jac_funs)
            ])

        return x_fun, x_jac_fun

    def create_z_mat(self, data=None):
        """Create the random effects matrix, link with limetr.
        """
        data = self.data if data is None else data
        mat = np.hstack(
            [cov_model.create_z_mat(data) for cov_model in self.cov_models])

        return mat

    def create_c_mat(self):
        """Create the constraints matrices.
        """
        c_mat = np.zeros((0, self.num_vars))
        c_vec = np.zeros((2, 0))

        for i, cov_model in enumerate(self.cov_models):
            if cov_model.num_constraints != 0:
                c_mat_sub = np.zeros(
                    (cov_model.num_constraints, self.num_vars))
                c_mat_sub[:, self.x_vars_indices[
                    i]], c_vec_sub = cov_model.create_constraint_mat()
                c_mat = np.vstack((c_mat, c_mat_sub))
                c_vec = np.hstack((c_vec, c_vec_sub))

        return c_mat, c_vec

    def create_h_mat(self):
        """Create the regularizer matrices.
        """
        h_mat = np.zeros((0, self.num_vars))
        h_vec = np.zeros((2, 0))

        for i, cov_model in enumerate(self.cov_models):
            if cov_model.num_regularizations != 0:
                h_mat_sub = np.zeros(
                    (cov_model.num_regularizations, self.num_vars))
                h_mat_sub[:, self.x_vars_indices[
                    i]], h_vec_sub = cov_model.create_regularization_mat()
                h_mat = np.vstack((h_mat, h_mat_sub))
                h_vec = np.hstack((h_vec, h_vec_sub))

        return h_mat, h_vec

    def create_uprior(self):
        """Create direct uniform prior.
        """
        uprior = np.array([[-np.inf] * self.num_vars,
                           [np.inf] * self.num_vars])

        for i, cov_model in enumerate(self.cov_models):
            uprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_uniform
            uprior[:, self.z_vars_indices[i] +
                   self.num_x_vars] = cov_model.prior_gamma_uniform

        return uprior

    def create_gprior(self):
        """Create direct gaussian prior.
        """
        gprior = np.array([[0] * self.num_vars, [np.inf] * self.num_vars])

        for i, cov_model in enumerate(self.cov_models):
            gprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_gaussian
            gprior[:, self.z_vars_indices[i] +
                   self.num_x_vars] = cov_model.prior_gamma_gaussian

        return gprior

    def create_lprior(self):
        """Create direct laplace prior.
        """
        lprior = np.array([[0] * self.num_vars, [np.inf] * self.num_vars])

        for i, cov_model in enumerate(self.cov_models):
            lprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_laplace
            lprior[:, self.z_vars_indices[i] +
                   self.num_x_vars] = cov_model.prior_gamma_laplace

        return lprior

    def fit_model(self, **fit_options):
        """Fitting the model through limetr.

        Args:
            x0 (np.ndarray): Initial guess for the optimization problem.
            inner_print_level (int): If non-zero printing iteration information of the inner problem.
            inner_max_iter (int): Maximum inner number of iterations.
            inner_tol (float): Tolerance of the inner problem.
            outer_verbose (bool): If `True` print out iteration information.
            outer_max_iter (int): Maximum outer number of iterations.
            outer_step_size (float): Step size of the outer problem.
            outer_tol (float): Tolerance of the outer problem.
            normalize_trimming_grad (bool): If `True`, normalize the gradient of the outer trimmign problem.
        """
        # dimensions
        n = self.data.study_sizes
        k_beta = self.num_x_vars
        k_gamma = self.num_z_vars

        # data
        y = self.data.obs
        s = self.data.obs_se

        # create x fun and z mat
        x_fun, x_fun_jac = self.create_x_fun()
        z_mat = self.create_z_mat()
        # scale z_mat
        z_scale = np.max(np.abs(z_mat), axis=0)
        z_mat /= z_scale

        # priors
        c_mat, c_vec = self.create_c_mat()
        h_mat, h_vec = self.create_h_mat()
        c_fun, c_fun_jac = utils.mat_to_fun(c_mat)
        h_fun, h_fun_jac = utils.mat_to_fun(h_mat)

        uprior = self.create_uprior()
        uprior[:, self.num_x_vars:self.num_vars] *= z_scale**2
        gprior = self.create_gprior()
        gprior[:, self.num_x_vars:self.num_vars] *= z_scale**2
        lprior = self.create_lprior()
        lprior[:, self.num_x_vars:self.num_vars] *= z_scale**2

        if np.isneginf(uprior[0]).all() and np.isposinf(uprior[1]).all():
            uprior = None
        if np.isposinf(gprior[1]).all():
            gprior = None
        if np.isposinf(lprior[1]).all():
            lprior = None

        # create limetr object
        self.lt = LimeTr(n,
                         k_beta,
                         k_gamma,
                         y,
                         x_fun,
                         x_fun_jac,
                         z_mat,
                         S=s,
                         C=c_fun,
                         JC=c_fun_jac,
                         c=c_vec,
                         H=h_fun,
                         JH=h_fun_jac,
                         h=h_vec,
                         uprior=uprior,
                         gprior=gprior,
                         lprior=lprior,
                         inlier_percentage=self.inlier_pct)

        self.lt.fitModel(**fit_options)
        self.lt.Z *= z_scale
        if hasattr(self.lt, 'gprior'):
            self.lt.gprior[:, self.lt.idx_gamma] /= z_scale**2
        if hasattr(self.lt, 'uprior'):
            self.lt.uprior[:, self.lt.idx_gamma] /= z_scale**2
        if hasattr(self.lt, 'lprior'):
            self.lt.lprior[:, self.lt.idx_gamma] /= z_scale**2
        self.lt.gamma /= z_scale**2

        self.beta_soln = self.lt.beta.copy()
        self.gamma_soln = self.lt.gamma.copy()
        self.w_soln = self.lt.w.copy()
        self.u_soln = self.lt.estimateRE()
        self.re_soln = {
            study: self.u_soln[i]
            for i, study in enumerate(self.data.studies)
        }

    def extract_re(self, study_id: np.ndarray) -> np.ndarray:
        """Extract the random effect for a given dataset.
        """
        re = np.vstack([
            self.re_soln[study]
            if study in self.re_soln else np.zeros(self.num_z_vars)
            for study in study_id
        ])
        return re

    def predict(self,
                data: MRData,
                predict_for_study: bool = False,
                sort_by_data_id: bool = False) -> np.ndarray:
        """Create new prediction with existing solution.

        Args:
            data (MRData): MRData object contains the predict data.
            predict_for_study (bool, optional):
                If `True`, use the random effects information to prediction for specific
                study. If the `study_id` in `data` do not contain in the fitting data, it
                will assume the corresponding random effects equal to 0.
            sort_by_data_id (bool, optional):
                If `True`, will sort the final prediction as the order of the original
                data frame that used to create the `data`. Default to False.

        Returns:
            np.ndarray: Predicted outcome array.
        """
        assert data.has_covs(
            self.cov_names
        ), "Prediction data do not have covariates used for fitting."
        x_fun, _ = self.create_x_fun(data=data)
        prediction = x_fun(self.beta_soln)
        if predict_for_study:
            z_mat = self.create_z_mat(data=data)
            re = self.extract_re(data.study_id)
            prediction += np.sum(z_mat * re, axis=1)

        if sort_by_data_id:
            prediction = prediction[np.argsort(data.data_id)]

        return prediction

    def sample_soln(self,
                    sample_size: int = 1,
                    sim_prior: bool = True,
                    sim_re: bool = True,
                    print_level: int = 0) -> Tuple[np.ndarray, np.ndarray]:
        """Sample solutions.

        Args:
            sample_size (int, optional): Number of samples.
            sim_prior (bool, optional): If `True`, simulate priors.
            sim_re (bool, optional): If `True`, simulate random effects.
            print_level (int, optional):
                Level detailed of optimization information printed out during sampling process.
                If 0, no information will be printed out.

        Return:
            Tuple[np.ndarray, np.ndarray]:
                Return beta samples and gamma samples.
        """
        if self.lt is None:
            raise ValueError('Please fit the model first.')

        beta_soln_samples, gamma_soln_samples = \
            self.lt.sampleSoln(self.lt,
                               sample_size=sample_size,
                               sim_prior=sim_prior,
                               sim_re=sim_re,
                               print_level=print_level)

        return beta_soln_samples, gamma_soln_samples

    def create_draws(self,
                     data: MRData,
                     beta_samples: np.ndarray,
                     gamma_samples: np.ndarray,
                     random_study: bool = True,
                     sort_by_study_id: bool = False) -> np.ndarray:
        """Create draws for the given data set.

        Args:
            data (MRData): MRData object contains predict data.
            beta_samples (np.ndarray): Samples of beta.
            gamma_samples (np.ndarray): Samples of gamma.
            random_study (bool, optional):
                If `True` the draws will include uncertainty from study heterogeneity.
            sort_by_data_id (bool, optional):
                If `True`, will sort the final prediction as the order of the original
                data frame that used to create the `data`. Default to False.

        Returns:
            np.ndarray: Returns outcome sample matrix.
        """
        sample_size = beta_samples.shape[0]
        assert beta_samples.shape == (sample_size, self.num_x_vars)
        assert gamma_samples.shape == (sample_size, self.num_z_vars)

        x_fun, x_jac_fun = self.create_x_fun(data=data)
        z_mat = self.create_z_mat(data=data)

        y_samples = np.vstack(
            [x_fun(beta_sample) for beta_sample in beta_samples])

        if random_study:
            u_samples = np.random.randn(
                sample_size, self.num_z_vars) * np.sqrt(gamma_samples)
            y_samples += u_samples.dot(z_mat.T)
        else:
            re = self.extract_re(data.study_id)
            y_samples += np.sum(z_mat * re, axis=1)

        if sort_by_study_id:
            y_samples = y_samples[:, np.argsort(data.data_id)]

        return y_samples.T
 def sampleGlobalWithLimeTr(self, sample_size=100, max_iter=300):
     beta_samples, gamma_samples = LimeTr.sampleSoln(
         self.model, sample_size=sample_size, max_iter=max_iter)
     return beta_samples, gamma_samples