def sample_params(self, n_samples=500): if 'mr_list' in dir(self.mr): # sample for each submodel sample_size_list = self.mr.compute_sample_sizes(n_samples) param_samples = [ LimeTr.sampleSoln(sub_mr.lt, sample_size=ss) for sub_mr, ss in zip(self.mr.mr_list, sample_size_list) ] given_samples = { 'given_beta_samples_list': [i[0] for i in param_samples], 'given_gamma_samples_list': [i[1] for i in param_samples] } else: beta_samples, gamma_samples = LimeTr.sampleSoln( self.mr.lt, sample_size=n_samples) given_samples = { 'given_beta_samples': beta_samples, 'given_gamma_samples': gamma_samples } self.given_samples = given_samples
def get_parameter_samples(mr, n_samples=1000): # sample for each submodel sample_size_list = mr.compute_sample_sizes(n_samples) param_samples = [ LimeTr.sampleSoln(sub_mr.lt, sample_size=ss) for sub_mr, ss in zip(mr.mr_list, sample_size_list) ] given_samples = { 'given_beta_samples_list': [i[0] for i in param_samples], 'given_gamma_samples_list': [i[1] for i in param_samples] } return given_samples
def predictData(self, pred_x_cov_list, pred_z_cov_list, sample_size, pred_study_sizes=None, given_beta_samples=None, given_gamma_samples=None, ref_point=None, include_random_effect=True): # sample solutions if given_beta_samples is None or given_gamma_samples is None: beta_samples, gamma_samples = LimeTr.sampleSoln( self.lt, sample_size=sample_size) else: beta_samples = given_beta_samples gamma_samples = given_gamma_samples # calculate the beta and gamma post cov # self.beta_samples_mean = np.mean(beta_samples, axis=0) # self.gamma_samples_mean = np.mean(gamma_samples, axis=0) # self.beta_samples_cov = \ # beta_samples.T.dot(beta_samples)/sample_size - \ # np.outer(self.beta_samples_mean, self.beta_samples_mean) # self.gamma_samples_cov = \ # gamma_samples.T.dot(gamma_samples)/sample_size - \ # np.outer(self.gamma_samples_mean, self.gamma_samples_mean) # create x cov (pred_F, pred_JF, pred_F_list, pred_JF_list, pred_id_beta_list) = utils.constructPredXCov(pred_x_cov_list, self) # create z cov (pred_Z, pred_Z_list, pred_id_gamma_list) = utils.constructPredZCov(pred_z_cov_list, self) # num of studies pred_num_obs = pred_Z.shape[0] # create observation samples y_samples = np.vstack([pred_F(beta) for beta in beta_samples]) if ref_point is not None: x_cov_spline_id = [ x_cov['spline_id'] for x_cov in pred_x_cov_list if 'spline' in x_cov['cov_type'] ] if len(x_cov_spline_id) == 0: raise Exception("Error: no spline x cov") if len(x_cov_spline_id) >= 2: raise Exception("Error: multiple spline x covs") spline = self.spline_list[x_cov_spline_id[0]] ref_risk = spline.designMat(np.array([ref_point])).dot( beta_samples[:, self.id_spline_beta_list[x_cov_spline_id[0]]].T) y_samples /= ref_risk.reshape(sample_size, 1) pred_gamma = np.hstack([ self.gamma_soln[pred_id_gamma_list[i]] for i in range(len(pred_id_gamma_list)) ]) if include_random_effect: if self.rr_random_slope: u = np.random.randn(sample_size, self.k_gamma)*\ np.sqrt(self.gamma_soln) # zu = np.sum(pred_Z*u, axis=1) zu = u[:, 0] valid_x_cov_id = [ i for i in range(len(pred_x_cov_list)) if pred_x_cov_list[i]['cov_type'] == 'spline' ] if len(valid_x_cov_id) == 0: raise Exception( "Error: no suitable x cov for random slope model.") if len(valid_x_cov_id) >= 2: raise Exception( "Error: multiple x cov for random slope model.") mat = pred_x_cov_list[valid_x_cov_id[0]]['mat'] if ref_point is None: y_samples *= np.exp(np.outer(zu, mat - mat[0])) else: y_samples *= np.exp(np.outer(zu, mat - ref_point)) else: if pred_study_sizes is None: pred_study_sizes = np.array([1] * pred_num_obs) else: assert sum(pred_study_sizes) == pred_num_obs pred_num_studies = len(pred_study_sizes) pred_Z_sub = np.split(pred_Z, np.cumsum(pred_study_sizes)[:-1]) u = [ np.random.multivariate_normal( np.zeros(pred_study_sizes[i]), (pred_Z_sub[i] * pred_gamma).dot(pred_Z_sub[i].T), sample_size) for i in range(pred_num_studies) ] U = np.hstack(u) if np.any([ 'log_ratio' in self.x_cov_list[i]['cov_type'] for i in range(len(self.x_cov_list)) ]): y_samples *= np.exp(U) else: y_samples += U return y_samples, beta_samples, gamma_samples, pred_F, pred_Z
class MRBRT: """MR-BRT Object """ def __init__(self, data: MRData, cov_models: List[CovModel], inlier_pct: float = 1.0): """Constructor of MRBRT. Args: data (MRData): Data for meta-regression. cov_models (List[CovModel]): A list of covariates models. inlier_pct (float, optional): A float number between 0 and 1 indicate the percentage of inliers. """ self.data = data self.cov_models = cov_models self.inlier_pct = inlier_pct self.check_input() self.cov_model_names = [ cov_model.name for cov_model in self.cov_models ] self.num_cov_models = len(self.cov_models) self.cov_names = [] for cov_model in self.cov_models: self.cov_names.extend(cov_model.covs) self.num_covs = len(self.cov_names) # attach data to cov_model for cov_model in self.cov_models: cov_model.attach_data(self.data) # fixed effects size and index self.x_vars_sizes = [ cov_model.num_x_vars for cov_model in self.cov_models ] self.x_vars_indices = utils.sizes_to_indices(self.x_vars_sizes) self.num_x_vars = sum(self.x_vars_sizes) # random effects size and index self.z_vars_sizes = [ cov_model.num_z_vars for cov_model in self.cov_models ] self.z_vars_indices = utils.sizes_to_indices(self.z_vars_sizes) self.num_z_vars = sum(self.z_vars_sizes) self.num_vars = self.num_x_vars + self.num_z_vars # number of constraints self.num_constraints = sum( [cov_model.num_constraints for cov_model in self.cov_models]) # number of regularizations self.num_regularizations = sum( [cov_model.num_regularizations for cov_model in self.cov_models]) # place holder for the limetr objective self.lt = None self.beta_soln = None self.gamma_soln = None self.u_soln = None self.w_soln = None self.re_soln = None def check_input(self): """Check the input type of the attributes. """ assert isinstance(self.data, MRData) assert isinstance(self.cov_models, list) assert all( [isinstance(cov_model, CovModel) for cov_model in self.cov_models]) assert (self.inlier_pct >= 0.0) and (self.inlier_pct <= 1.0) def get_cov_model(self, name: str) -> CovModel: """Choose covariate model with name. """ index = self.get_cov_model_index(name) return self.cov_models[index] def get_cov_model_index(self, name: str) -> int: """From cov_model name get the index. """ matching_index = [ index for index, cov_model_name in enumerate(self.cov_model_names) if cov_model_name == name ] num_matching_index = len(matching_index) assert num_matching_index == 1, f"Number of matching index is {num_matching_index}." return matching_index[0] def create_x_fun(self, data=None): """Create the fixed effects function, link with limetr. """ data = self.data if data is None else data # create design functions design_funs = [ cov_model.create_x_fun(data) for cov_model in self.cov_models ] funs, jac_funs = list(zip(*design_funs)) def x_fun(beta, funs=funs): return sum( fun(beta[self.x_vars_indices[i]]) for i, fun in enumerate(funs)) def x_jac_fun(beta, jac_funs=jac_funs): return np.hstack([ jac_fun(beta[self.x_vars_indices[i]]) for i, jac_fun in enumerate(jac_funs) ]) return x_fun, x_jac_fun def create_z_mat(self, data=None): """Create the random effects matrix, link with limetr. """ data = self.data if data is None else data mat = np.hstack( [cov_model.create_z_mat(data) for cov_model in self.cov_models]) return mat def create_c_mat(self): """Create the constraints matrices. """ c_mat = np.zeros((0, self.num_vars)) c_vec = np.zeros((2, 0)) for i, cov_model in enumerate(self.cov_models): if cov_model.num_constraints != 0: c_mat_sub = np.zeros( (cov_model.num_constraints, self.num_vars)) c_mat_sub[:, self.x_vars_indices[ i]], c_vec_sub = cov_model.create_constraint_mat() c_mat = np.vstack((c_mat, c_mat_sub)) c_vec = np.hstack((c_vec, c_vec_sub)) return c_mat, c_vec def create_h_mat(self): """Create the regularizer matrices. """ h_mat = np.zeros((0, self.num_vars)) h_vec = np.zeros((2, 0)) for i, cov_model in enumerate(self.cov_models): if cov_model.num_regularizations != 0: h_mat_sub = np.zeros( (cov_model.num_regularizations, self.num_vars)) h_mat_sub[:, self.x_vars_indices[ i]], h_vec_sub = cov_model.create_regularization_mat() h_mat = np.vstack((h_mat, h_mat_sub)) h_vec = np.hstack((h_vec, h_vec_sub)) return h_mat, h_vec def create_uprior(self): """Create direct uniform prior. """ uprior = np.array([[-np.inf] * self.num_vars, [np.inf] * self.num_vars]) for i, cov_model in enumerate(self.cov_models): uprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_uniform uprior[:, self.z_vars_indices[i] + self.num_x_vars] = cov_model.prior_gamma_uniform return uprior def create_gprior(self): """Create direct gaussian prior. """ gprior = np.array([[0] * self.num_vars, [np.inf] * self.num_vars]) for i, cov_model in enumerate(self.cov_models): gprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_gaussian gprior[:, self.z_vars_indices[i] + self.num_x_vars] = cov_model.prior_gamma_gaussian return gprior def create_lprior(self): """Create direct laplace prior. """ lprior = np.array([[0] * self.num_vars, [np.inf] * self.num_vars]) for i, cov_model in enumerate(self.cov_models): lprior[:, self.x_vars_indices[i]] = cov_model.prior_beta_laplace lprior[:, self.z_vars_indices[i] + self.num_x_vars] = cov_model.prior_gamma_laplace return lprior def fit_model(self, **fit_options): """Fitting the model through limetr. Args: x0 (np.ndarray): Initial guess for the optimization problem. inner_print_level (int): If non-zero printing iteration information of the inner problem. inner_max_iter (int): Maximum inner number of iterations. inner_tol (float): Tolerance of the inner problem. outer_verbose (bool): If `True` print out iteration information. outer_max_iter (int): Maximum outer number of iterations. outer_step_size (float): Step size of the outer problem. outer_tol (float): Tolerance of the outer problem. normalize_trimming_grad (bool): If `True`, normalize the gradient of the outer trimmign problem. """ # dimensions n = self.data.study_sizes k_beta = self.num_x_vars k_gamma = self.num_z_vars # data y = self.data.obs s = self.data.obs_se # create x fun and z mat x_fun, x_fun_jac = self.create_x_fun() z_mat = self.create_z_mat() # scale z_mat z_scale = np.max(np.abs(z_mat), axis=0) z_mat /= z_scale # priors c_mat, c_vec = self.create_c_mat() h_mat, h_vec = self.create_h_mat() c_fun, c_fun_jac = utils.mat_to_fun(c_mat) h_fun, h_fun_jac = utils.mat_to_fun(h_mat) uprior = self.create_uprior() uprior[:, self.num_x_vars:self.num_vars] *= z_scale**2 gprior = self.create_gprior() gprior[:, self.num_x_vars:self.num_vars] *= z_scale**2 lprior = self.create_lprior() lprior[:, self.num_x_vars:self.num_vars] *= z_scale**2 if np.isneginf(uprior[0]).all() and np.isposinf(uprior[1]).all(): uprior = None if np.isposinf(gprior[1]).all(): gprior = None if np.isposinf(lprior[1]).all(): lprior = None # create limetr object self.lt = LimeTr(n, k_beta, k_gamma, y, x_fun, x_fun_jac, z_mat, S=s, C=c_fun, JC=c_fun_jac, c=c_vec, H=h_fun, JH=h_fun_jac, h=h_vec, uprior=uprior, gprior=gprior, lprior=lprior, inlier_percentage=self.inlier_pct) self.lt.fitModel(**fit_options) self.lt.Z *= z_scale if hasattr(self.lt, 'gprior'): self.lt.gprior[:, self.lt.idx_gamma] /= z_scale**2 if hasattr(self.lt, 'uprior'): self.lt.uprior[:, self.lt.idx_gamma] /= z_scale**2 if hasattr(self.lt, 'lprior'): self.lt.lprior[:, self.lt.idx_gamma] /= z_scale**2 self.lt.gamma /= z_scale**2 self.beta_soln = self.lt.beta.copy() self.gamma_soln = self.lt.gamma.copy() self.w_soln = self.lt.w.copy() self.u_soln = self.lt.estimateRE() self.re_soln = { study: self.u_soln[i] for i, study in enumerate(self.data.studies) } def extract_re(self, study_id: np.ndarray) -> np.ndarray: """Extract the random effect for a given dataset. """ re = np.vstack([ self.re_soln[study] if study in self.re_soln else np.zeros(self.num_z_vars) for study in study_id ]) return re def predict(self, data: MRData, predict_for_study: bool = False, sort_by_data_id: bool = False) -> np.ndarray: """Create new prediction with existing solution. Args: data (MRData): MRData object contains the predict data. predict_for_study (bool, optional): If `True`, use the random effects information to prediction for specific study. If the `study_id` in `data` do not contain in the fitting data, it will assume the corresponding random effects equal to 0. sort_by_data_id (bool, optional): If `True`, will sort the final prediction as the order of the original data frame that used to create the `data`. Default to False. Returns: np.ndarray: Predicted outcome array. """ assert data.has_covs( self.cov_names ), "Prediction data do not have covariates used for fitting." x_fun, _ = self.create_x_fun(data=data) prediction = x_fun(self.beta_soln) if predict_for_study: z_mat = self.create_z_mat(data=data) re = self.extract_re(data.study_id) prediction += np.sum(z_mat * re, axis=1) if sort_by_data_id: prediction = prediction[np.argsort(data.data_id)] return prediction def sample_soln(self, sample_size: int = 1, sim_prior: bool = True, sim_re: bool = True, print_level: int = 0) -> Tuple[np.ndarray, np.ndarray]: """Sample solutions. Args: sample_size (int, optional): Number of samples. sim_prior (bool, optional): If `True`, simulate priors. sim_re (bool, optional): If `True`, simulate random effects. print_level (int, optional): Level detailed of optimization information printed out during sampling process. If 0, no information will be printed out. Return: Tuple[np.ndarray, np.ndarray]: Return beta samples and gamma samples. """ if self.lt is None: raise ValueError('Please fit the model first.') beta_soln_samples, gamma_soln_samples = \ self.lt.sampleSoln(self.lt, sample_size=sample_size, sim_prior=sim_prior, sim_re=sim_re, print_level=print_level) return beta_soln_samples, gamma_soln_samples def create_draws(self, data: MRData, beta_samples: np.ndarray, gamma_samples: np.ndarray, random_study: bool = True, sort_by_study_id: bool = False) -> np.ndarray: """Create draws for the given data set. Args: data (MRData): MRData object contains predict data. beta_samples (np.ndarray): Samples of beta. gamma_samples (np.ndarray): Samples of gamma. random_study (bool, optional): If `True` the draws will include uncertainty from study heterogeneity. sort_by_data_id (bool, optional): If `True`, will sort the final prediction as the order of the original data frame that used to create the `data`. Default to False. Returns: np.ndarray: Returns outcome sample matrix. """ sample_size = beta_samples.shape[0] assert beta_samples.shape == (sample_size, self.num_x_vars) assert gamma_samples.shape == (sample_size, self.num_z_vars) x_fun, x_jac_fun = self.create_x_fun(data=data) z_mat = self.create_z_mat(data=data) y_samples = np.vstack( [x_fun(beta_sample) for beta_sample in beta_samples]) if random_study: u_samples = np.random.randn( sample_size, self.num_z_vars) * np.sqrt(gamma_samples) y_samples += u_samples.dot(z_mat.T) else: re = self.extract_re(data.study_id) y_samples += np.sum(z_mat * re, axis=1) if sort_by_study_id: y_samples = y_samples[:, np.argsort(data.data_id)] return y_samples.T
def sampleGlobalWithLimeTr(self, sample_size=100, max_iter=300): beta_samples, gamma_samples = LimeTr.sampleSoln( self.model, sample_size=sample_size, max_iter=max_iter) return beta_samples, gamma_samples