def from_formula(cls, formula, data, window, weights=None, subset=None, *args, **kwargs): if subset is not None: data = data.loc[subset] eval_env = kwargs.pop('eval_env', None) if eval_env is None: eval_env = 2 elif eval_env == -1: from patsy import EvalEnvironment eval_env = EvalEnvironment({}) else: eval_env += 1 # we're going down the stack again missing = kwargs.get('missing', 'skip') from patsy import dmatrices, NAAction na_action = NAAction(on_NA='raise', NA_types=[]) result = dmatrices(formula, data, eval_env, return_type='dataframe', NA_action=na_action) endog, exog = result if (endog.ndim > 1 and endog.shape[1] > 1) or endog.ndim > 2: raise ValueError('endog has evaluated to an array with multiple ' 'columns that has shape {0}. This occurs when ' 'the variable converted to endog is non-numeric' ' (e.g., bool or str).'.format(endog.shape)) kwargs.update({'missing': missing, 'window': window}) if weights is not None: kwargs['weights'] = weights mod = cls(endog, exog, *args, **kwargs) mod.formula = formula # since we got a dataframe, attach the original mod.data.frame = data return mod
def __init__( self, # Observations formula, data, # NA action NA_action="drop", # Environment eval_env=0, # Number of cores n_estimators=500, n_jobs=1, **kwargs): """Function to fit a random forest model. The function fits a random forest model using patsy formula. """ # Model specifications self.model_type = "random_forest" self.formula = formula self.data = data # Patsy eval_env = EvalEnvironment.capture(eval_env, reference=1) y, x = dmatrices(formula, data, eval_env, NA_action) self._y_design_info = y.design_info self._x_design_info = x.design_info # Create and train Random Forest rf = RandomForestClassifier(n_estimators=n_estimators, n_jobs=n_jobs, **kwargs) rf.fit(x, y) self.rf = rf
def rerp( self, # rERPRequest arguments event_query, start_time, stop_time, formula, name=None, eval_env=0, bad_event_query=None, all_or_nothing=False, # multi_rerp arguments artifact_query="has _ARTIFACT_TYPE", artifact_type_field="_ARTIFACT_TYPE", overlap_correction=True, regression_strategy="auto"): eval_env = EvalEnvironment.capture(eval_env, reference=1) request = rERPRequest( event_query, start_time, stop_time, formula, name=name, eval_env=eval_env, bad_event_query=bad_event_query, all_or_nothing=all_or_nothing) rerps = self.multi_rerp([request], artifact_query=artifact_query, artifact_type_field=artifact_type_field, overlap_correction=overlap_correction, regression_strategy=regression_strategy) assert len(rerps) == 1 return rerps[0]
def rerp( self, # rERPRequest arguments event_query, start_time, stop_time, formula, name=None, eval_env=0, bad_event_query=None, all_or_nothing=False, # multi_rerp arguments artifact_query="has _ARTIFACT_TYPE", artifact_type_field="_ARTIFACT_TYPE", overlap_correction=True, regression_strategy="auto"): eval_env = EvalEnvironment.capture(eval_env, reference=1) request = rERPRequest(event_query, start_time, stop_time, formula, name=name, eval_env=eval_env, bad_event_query=bad_event_query, all_or_nothing=all_or_nothing) rerps = self.multi_rerp([request], artifact_query=artifact_query, artifact_type_field=artifact_type_field, overlap_correction=overlap_correction, regression_strategy=regression_strategy) assert len(rerps) == 1 return rerps[0]
def _fit_transform(self, data, y=None): eval_env = EvalEnvironment.capture(self.eval_env, reference=2) formula = _drop_intercept(self.formula, self.add_intercept) design = dmatrix(formula, data, eval_env=eval_env, NA_action=self.NA_action, return_type='dataframe') self.design_ = design.design_info if self.return_type == 'dataframe': return design else: return np.array(design)
def test_issue_11(): # Give a sensible error message for level mismatches # (At some points we've failed to put an origin= on these errors) env = EvalEnvironment.capture() data = {"X": [0, 1, 2, 3], "Y": [1, 2, 3, 4]} formula = "C(X) + Y" new_data = {"X": [0, 0, 1, 2, 3, 3, 4], "Y": [1, 2, 3, 4, 5, 6, 7]} info = dmatrix(formula, data) try: build_design_matrices([info.design_info.builder], new_data) except PatsyError, e: assert e.origin == Origin(formula, 0, 4)
def test_issue_11(): # Give a sensible error message for level mismatches # (At some points we've failed to put an origin= on these errors) env = EvalEnvironment.capture() data = {"X" : [0,1,2,3], "Y" : [1,2,3,4]} formula = "C(X) + Y" new_data = {"X" : [0,0,1,2,3,3,4], "Y" : [1,2,3,4,5,6,7]} info = dmatrix(formula, data) try: build_design_matrices([info.design_info.builder], new_data) except PatsyError, e: assert e.origin == Origin(formula, 0, 4)
def from_formula(cls, formula, data, window, weights=None, subset=None, *args, **kwargs): if subset is not None: data = data.loc[subset] eval_env = kwargs.pop("eval_env", None) if eval_env is None: eval_env = 2 elif eval_env == -1: from patsy import EvalEnvironment eval_env = EvalEnvironment({}) else: eval_env += 1 # we're going down the stack again missing = kwargs.get("missing", "skip") from patsy import NAAction, dmatrices na_action = NAAction(on_NA="raise", NA_types=[]) result = dmatrices( formula, data, eval_env, return_type="dataframe", NA_action=na_action, ) endog, exog = result if (endog.ndim > 1 and endog.shape[1] > 1) or endog.ndim > 2: raise ValueError("endog has evaluated to an array with multiple " "columns that has shape {0}. This occurs when " "the variable converted to endog is non-numeric" " (e.g., bool or str).".format(endog.shape)) kwargs.update({"missing": missing, "window": window}) if weights is not None: kwargs["weights"] = weights mod = cls(endog, exog, *args, **kwargs) mod.formula = formula # since we got a dataframe, attach the original mod.data.frame = data return mod
def _fit_transform(self, data, y=None): eval_env = EvalEnvironment.capture(self.eval_env, reference=2) formula = _drop_intercept(self.formula, self.add_intercept) design = dmatrix(formula, data, eval_env=eval_env, NA_action=self.NA_action, return_type='dataframe') self.design_ = design.design_info if self.return_type == 'dataframe': return design else: return np.array(design) self.feature_names_ = design.design_info.column_names return np.array(design)
def fit(self, data, y=None): """Fit the scikit-learn model using the formula. Parameters ---------- data : dict-like (pandas dataframe) Input data. Contains features and possible labels. Column names need to match variables in formula. """ eval_env = EvalEnvironment.capture(self.eval_env, reference=1) formula = _drop_intercept(self.formula, self.add_intercept) design_y, design_X = dmatrices(formula, data, eval_env=eval_env, NA_action=self.NA_action) self.design_y_ = design_y.design_info self.design_X_ = design_X.design_info # convert to 1d vector so we don't get a warning # from sklearn. design_y = column_or_1d(design_y) est = clone(self.estimator) self.estimator_ = est.fit(design_X, design_y) return self
def rerp(self, name, event_query, start_time, stop_time, formula, artifact_query="has _ARTIFACT_TYPE", artifact_type_field="_ARTIFACT_TYPE", overlap_correction=True, regression_strategy="auto", eval_env=0): eval_env = EvalEnvironment.capture(eval_env, reference=1) rerp_specs = [ rERPSpec(name, event_query, start_time, stop_time, formula) ] return self.multi_rerp(rerp_specs, artifact_query=artifact_query, artifact_type_field=artifact_type_field, overlap_correction=overlap_correction, regression_strategy=regression_strategy, eval_env=eval_env)
def transform(self, data): vectors = dict() matrices = dict() for term in self.termlist: for e in term.factors: state = {} eval_env = EvalEnvironment.capture(0) eval_env = eval_env.with_outer_namespace(self.feature_fns) passes = e.memorize_passes_needed(state, eval_env) mat = e.eval(state, data) is_var = len(mat.shape) == 1 if is_var: if isinstance(mat, pd.Series): mat = mat.values vectors[e.code] = np.reshape(mat, (mat.shape[0], 1)) #vectors[e.code] = mat elif isinstance(mat, (np.ndarray, spmatrix)): matrices[e.code] = mat else: raise RuntimeError("Unsupported data format: {}".format( type(mat))) list_of_mats = list(vectors.values()) + list(matrices.values()) num_sparse = len([l for l in list_of_mats if isinstance(l, spmatrix)]) if num_sparse == 0: if len(list_of_mats) == 1: return list_of_mats[0] else: return np.concatenate(list_of_mats, axis=1) elif len(list_of_mats) >= 1: # at least one sparse return hstack(list_of_mats, format='csr') else: print(list_of_mats) raise RuntimeError("No features found") return
def fit(self, data, y=None): """Fit the scikit-learn model using the formula. Parameters ---------- data : dict-like (pandas dataframe) Input data. Contains features and possible labels. Column names need to match variables in formula. """ eval_env = EvalEnvironment.capture(self.eval_env, reference=1) formula = _drop_intercept(self.formula, self.add_intercept) design_y, design_X = dmatrices(formula, data, eval_env=eval_env, NA_action=self.NA_action) self.design_y_ = design_y.design_info self.design_X_ = design_X.design_info self.feature_names_ = design_X.design_info.column_names # convert to 1d vector so we don't get a warning # from sklearn. design_y = column_or_1d(design_y) est = clone(self.estimator) self.estimator_ = est.fit(design_X, design_y) return self
def multi_rerp( self, rerp_specs, artifact_query="has _ARTIFACT_TYPE", artifact_type_field="_ARTIFACT_TYPE", overlap_correction=True, # This can be "continuous", "by-epoch", or "auto". If # "continuous", we always build one giant regression model, # treating the data as continuous. If "auto", we use the # (much faster) approach of generating a single regression # model and then applying it to each latency separately -- # but *only* if this will produce the same result as doing # the full regression. If "epoch", then we either use the # fast method, or else error out. Changing this argument # never affects the actual output of this function -- if it # does, that's a bug! In general, we can do the fast thing # if: # -- any artifacts affect either all or none of each # epoch, and # -- either, overlap_correction=False, # -- or, overlap_correction=True and there are in fact no # overlaps. regression_strategy="auto", eval_env=0): eval_env = EvalEnvironment.capture(eval_env, reference=1) return multi_rerp_impl(self, rerp_specs, artifact_query, artifact_type_field, overlap_correction, regression_strategy, eval_env) # For artifact and bad data in general counting: # make an intervalset for each kind of bad data # intersect each with the "wanted data" spans to throw away # irrelevantly bad data # and then do a special union operation that counts which and how many # of the inputs is non-zero at each point, to calculate shares # First get a representation of all okay data # starting with which spans we have recordings for, # then subtract artifacts, # then subtract NAs # Then for the rest of the data, epoch_spans = [] bad_spans = [] for (name, event_query, start_time, stop_time, formula) in rerp_specs: event_set = self.events.query(event_query) # Make a design matrix for each # Figure out which data points are okay: # -- where there is some entry in the design matrix # -- where there is no artifact # -- where all the design matrixes are non-NA # list like (start, stop, info), sort then scan to find overlaps. info # can be a reference to a row in a design matrix, or it could be a # note that the given span is off-limits. # If overlap_correction=False, we are going to handle each data span # individually. The only question is whether any of them have partial # overlaps with artifacts -- if so, then we need to do the # And if regress_by_epoch is auto or pass
def __init__( self, # Observations suitability_formula, data, # Spatial structure n_neighbors, neighbors, # NA action NA_action="drop", # Predictions data_pred=None, # Environment eval_env=0, # Chains burnin=1000, mcmc=1000, thin=1, # Starting values beta_start=0, Vrho_start=1, # Priors mubeta=0, Vbeta=1000, priorVrho=-1.0, # -1="1/Gamma" shape=0.5, rate=0.0005, Vrho_max=10, # Various seed=1234, verbose=1, save_rho=0, save_p=0): """Function to fit a model_binomial_iCAR model. The function model_binomial_iCAR estimates the parameters of a Binomial model with iCAR process for spatial autocorrelation in a hierarchical Bayesian framework. :param suitability_formula: A formula-like object that can be used to construct a design matrix (see ``patsy.dmatrices``). :param data: A dict-like object that can be used to look up variables referenced in ``suitability_formula``. :param n_neighbors: A vector of integers that indicates the number of neighbors (adjacent entities) of each spatial entity. length(n.neighbors) indicates the total number of spatial entities. :param neighbors: A vector of integers indicating the neighbors (adjacent entities) of each spatial entity. Must be of the form c(neighbors of entity 1, neighbors of entity 2, ... , neighbors of the last entity). Length of the neighbors vector should be equal to sum(n.neighbors). :param NA_action: What to do with rows that contain missing values (see ``patsy.dmatrices``). :param data_pred: Optional dataset for predictions. :param eval_env: Environment used to look up any variables referenced in suitability_formula that cannot be found in data (see ``patsy.dmatrices``). :param burnin: Number of iterations for the burnin phase. :param mcmc: The number of Gibbs iterations for the sampler. Total number of Gibbs iterations is equal to burnin+mcmc. burnin+mcmc must be divisible by 10 and superior or equal to 100 so that the progress bar can be displayed. :param thin: The thinning interval used in the simulation. The number of mcmc iterations must be divisible by this value. :param beta_start: Starting values for beta parameters. This can either be a scalar or a p-length vector. :param Vrho_start: Positive scalar indicating the starting value for the variance of the spatial random effects. :param mubeta: Means of the priors for the beta parameters of the suitability process. mubeta must be either a scalar or a p-length vector. If mubeta takes a scalar value, then that value will serve as the prior mean for all of the betas. The default value is set to 0 for an uninformative prior. :param Vbeta: Variances of the Normal priors for the beta parameters of the suitability process. Vbeta must be either a scalar or a p-length vector. If Vbeta takes a scalar value, then that value will serve as the prior variance for all of the betas. The default variance is large and set to 1000 for an uninformative flat prior. :param priorVrho: Type of prior for the variance of the spatial random effects. Can be set to a fixed positive scalar, or to an inverse-gamma distribution ("1/Gamma") with parameters shape and rate, or to a uniform distribution ("Uniform") on the interval [0,Vrho.max]. Default set to "1/Gamma". :param shape: The shape parameter for the Gamma prior on the precision of the spatial random effects. Default value is shape=0.5 for uninformative prior. :param rate: The rate (1/scale) parameter for the Gamma prior on the precision of the spatial random effects. Default value is rate=0.0005 for uninformative prior. :param Vrho_max: Upper bound for the uniform prior of the spatial random effect variance. Default set to 10. :param seed: The seed for the random number generator. Default set to 1234. :param verbose: A switch (0,1) which determines whether or not the progress of the sampler is printed to the screen. Default is 1: a progress bar is printed, indicating the step (in %) reached by the Gibbs sampler. :param save_rho: A switch (0,1) which determines whether or not the sampled values for rhos are saved. Default is 0: the posterior mean is computed and returned in the rho.pred vector. Be careful, setting save.rho to 1 might require a large amount of memory. :param save_p: A switch (0,1) which determines whether or not the sampled values for predictions are saved. Default is 0: the posterior mean is computed and returned in the theta.pred vector. Be careful, setting save.p to 1 might require a large amount of memory. :return: An object of class model_binomial_iCAR. """ # ==================== # Model specifications # ==================== self.model_type = "binomial_iCAR" self.suitability_formula = suitability_formula self.data = data self.n_neighbors = n_neighbors self.neighbors = neighbors self.NA_action = NA_action self.data_pred = data_pred self.eval_env = eval_env self.burnin = burnin self.mcmc = mcmc self.thin = thin self.beta_start = beta_start self.Vrho_start = Vrho_start self.mubeta = mubeta self.Vbeta = Vbeta self.priorVrho = priorVrho self.shape = shape self.rate = rate self.Vrho_max = Vrho_max self.seed = seed self.verbose = verbose self.save_rho = save_rho self.save_p = save_p # ======== # Form response, covariate matrices and model parameters # ======== # Patsy eval_env = EvalEnvironment.capture(eval_env, reference=1) y, x = dmatrices(suitability_formula, data, eval_env, NA_action) self._y_design_info = y.design_info self._x_design_info = x.design_info # Response Y = y[:, 0] nobs = len(Y) T = y[:, 1] # Suitability X_arr = x[:, :-1] # We remove the last column (cells) ncol_X = X_arr.shape[1] X = X_arr.flatten("F") # Flatten X by column (R/Fortran style) # Spatial correlation ncell = len(n_neighbors) cells = x[:, -1] # Last column of x # Predictions if (data_pred is None): X_pred = X cells_pred = cells npred = nobs if (data_pred is not None): (x_pred, ) = build_design_matrices([self._x_design_info], data_pred) X_pred = x_pred[:, :-1] X_pred = X_pred.flatten("F") # Flatten X_pred cells_pred = x_pred[:, -1] npred = len(cells_pred) # Model parameters npar = ncol_X ngibbs = mcmc + burnin nthin = thin nburn = burnin nsamp = mcmc // thin # ======== # Initial starting values for M-H # ======== if (np.size(beta_start) == 1 and beta_start == -99): # Use starting coefficient from logistic regression print("Using estimates from classic logistic regression as" " starting values for betas") mod_LR = LogisticRegression(solver="lbfgs") mod_LR = mod_LR.fit(X_arr, Y) beta_start = np.ravel(mod_LR.coef_) if (np.size(beta_start) == 1 and beta_start != -99): beta_start = np.ones(npar) * beta_start else: beta_start = beta_start rho_start = np.zeros(ncell) # Set to zero Vrho_start = Vrho_start # ======== # Form and check priors # ======== if (np.size(mubeta) == 1): mubeta = np.ones(npar) * mubeta else: mubeta = mubeta if (np.size(Vbeta) == 1): Vbeta = np.ones(npar) * Vbeta else: Vbeta = Vbeta shape = shape rate = rate Vrho_max = Vrho_max priorVrho = priorVrho # ======== # call C code to draw sample # ======== Sample = hbm.binomial_iCAR( # Constants and data ngibbs=int(ngibbs), nthin=int(nthin), nburn=int(nburn), nobs=int(nobs), ncell=int(ncell), np=int(npar), Y_obj=Y.astype(np.int32), T_obj=T.astype(np.int32), X_obj=X.astype(np.float64), # X must be flattened. # Spatial correlation C_obj=cells.astype(np.int32), # Must start at 0 for C. nNeigh_obj=n_neighbors.astype(np.int32), Neigh_obj=neighbors.astype(np.int32), # Must start at 0 for C. # Predictions npred=int(npred), X_pred_obj=X_pred.astype(np.float64), C_pred_obj=cells_pred.astype(np.int32), # Starting values for M-H beta_start_obj=beta_start.astype(np.float64), rho_start_obj=rho_start.astype(np.float64), Vrho_start=float(Vrho_start), # Defining priors mubeta_obj=mubeta.astype(np.float64), Vbeta_obj=Vbeta.astype(np.float64), priorVrho=float(priorVrho), shape=float(shape), rate=float(rate), Vrho_max=float(Vrho_max), # Seed seed=int(seed), # Verbose verbose=int(verbose), # Save rho and p save_rho=int(save_rho), save_p=int(save_p)) # Array of MCMC samples MCMC = np.zeros(shape=(nsamp, npar + 2)) MCMC[:, :npar] = np.array(Sample[0]).reshape(npar, nsamp).transpose() MCMC[:, npar] = Sample[2] MCMC[:, npar + 1] = Sample[3] self.mcmc = MCMC posterior_means = np.mean(MCMC, axis=0) self.betas = posterior_means[:-2] self.Vrho = posterior_means[-2] self.deviance = posterior_means[-1] # Save rho if (save_rho == 0): self.rho = np.array(Sample[1]) if (save_rho == 1): self.rho = np.array(Sample[1]).reshape(ncell, nsamp).transpose() # Save pred if (save_p == 0): self.theta_pred = np.array(Sample[5]) if (save_p == 1): self.theta_pred = np.array(Sample[5]).reshape(npred, nsamp).transpose() # theta_latent self.theta_latent = np.array(Sample[4])
def __init__( self, # Observations suitability_formula, data, # Spatial structure n_neighbors, neighbors, # NA action NA_action="drop", # Predictions data_pred=None, # Environment eval_env=0, # Chains burnin=1000, mcmc=1000, thin=1, # Starting values beta_start=0, Vrho_start=1, # Priors mubeta=0, Vbeta=1.0e6, priorVrho=-1.0, # -1="1/Gamma" shape=0.5, rate=0.0005, Vrho_max=10, # Various seed=1234, verbose=1, save_rho=0, save_p=0): """Function to fit a model_binomial_iCAR model. The function model_binomial_iCAR estimates the parameters of a Binomial model with iCAR process for spatial autocorrelation in a hierarchical Bayesian framework. """ # ==================== # Model specifications # ==================== self.model_type = "binomial_iCAR" self.suitability_formula = suitability_formula self.data = data self.n_neighbors = n_neighbors self.neighbors = neighbors self.NA_action = NA_action self.data_pred = data_pred self.eval_env = eval_env self.burnin = burnin self.mcmc = mcmc self.thin = thin self.beta_start = beta_start self.Vrho_start = Vrho_start self.mubeta = mubeta self.Vbeta = Vbeta self.priorVrho = priorVrho self.shape = shape self.rate = rate self.Vrho_max = Vrho_max self.seed = seed self.verbose = verbose self.save_rho = save_rho self.save_p = save_p # ======== # Form response, covariate matrices and model parameters # ======== # Patsy eval_env = EvalEnvironment.capture(eval_env, reference=1) y, x = dmatrices(suitability_formula, data, eval_env, NA_action) self._y_design_info = y.design_info self._x_design_info = x.design_info # Response Y = y[:, 0] nobs = len(Y) T = y[:, 1] # Suitability X_arr = x[:, :-1] # We remove the last column (cells) ncol_X = X_arr.shape[1] X = X_arr.flatten("F") # Flatten X by column (R/Fortran style) # Spatial correlation ncell = len(n_neighbors) cells = x[:, -1] # Last column of x # Predictions if (data_pred is None): X_pred = X cells_pred = cells npred = nobs if (data_pred is not None): (x_pred, ) = build_design_matrices([self._x_design_info], data_pred) X_pred = x_pred[:, :-1] X_pred = X_pred.flatten("F") # Flatten X_pred cells_pred = x_pred[:, -1] npred = len(cells_pred) # Model parameters npar = ncol_X ngibbs = mcmc + burnin nthin = thin nburn = burnin nsamp = mcmc // thin # ======== # Initial starting values for M-H # ======== if (np.size(beta_start) == 1 and beta_start == -99): # Use starting coefficient from logistic regression print("Using estimates from classic logistic regression as" " starting values for betas") mod_LR = LogisticRegression(solver="lbfgs") mod_LR = mod_LR.fit(X_arr, Y) beta_start = np.ravel(mod_LR.coef_) if (np.size(beta_start) == 1 and beta_start != -99): beta_start = np.ones(npar) * beta_start else: beta_start = beta_start rho_start = np.zeros(ncell) # Set to zero Vrho_start = Vrho_start # ======== # Form and check priors # ======== if (np.size(mubeta) == 1): mubeta = np.ones(npar) * mubeta else: mubeta = mubeta if (np.size(Vbeta) == 1): Vbeta = np.ones(npar) * Vbeta else: Vbeta = Vbeta shape = shape rate = rate Vrho_max = Vrho_max priorVrho = priorVrho # ======== # call C code to draw sample # ======== Sample = hsdm.binomial_iCAR( # Constants and data ngibbs=int(ngibbs), nthin=int(nthin), nburn=int(nburn), nobs=int(nobs), ncell=int(ncell), np=int(npar), Y_obj=Y.astype(np.int32), T_obj=T.astype(np.int32), X_obj=X.astype(np.float64), # X must be flattened. # Spatial correlation C_obj=cells.astype(np.int32), # Must start at 0 for C. nNeigh_obj=n_neighbors.astype(np.int32), Neigh_obj=neighbors.astype(np.int32), # Must start at 0 for C. # Predictions npred=int(npred), X_pred_obj=X_pred.astype(np.float64), C_pred_obj=cells_pred.astype(np.int32), # Starting values for M-H beta_start_obj=beta_start.astype(np.float64), rho_start_obj=rho_start.astype(np.float64), Vrho_start=float(Vrho_start), # Defining priors mubeta_obj=mubeta.astype(np.float64), Vbeta_obj=Vbeta.astype(np.float64), priorVrho=float(priorVrho), shape=float(shape), rate=float(rate), Vrho_max=float(Vrho_max), # Seed seed=int(seed), # Verbose verbose=int(verbose), # Save rho and p save_rho=int(save_rho), save_p=int(save_p)) # Array of MCMC samples MCMC = np.zeros(shape=(nsamp, npar + 2)) MCMC[:, :npar] = np.array(Sample[0]).reshape(npar, nsamp).transpose() MCMC[:, npar] = Sample[2] MCMC[:, npar + 1] = Sample[3] self.mcmc = MCMC posterior_means = np.mean(MCMC, axis=0) self.betas = posterior_means[:-2] self.Vrho = posterior_means[-2] self.deviance = posterior_means[-1] # Save rho if (save_rho == 0): self.rho = np.array(Sample[1]) if (save_rho == 1): self.rho = np.array(Sample[1]).reshape(ncell, nsamp).transpose() # Save pred if (save_p == 0): self.theta_pred = np.array(Sample[5]) if (save_p == 1): self.theta_pred = np.array(Sample[5]).reshape(npred, nsamp).transpose() # theta_latent self.theta_latent = np.array(Sample[4])
Age_Calc:Caseduplication 1.4583 2.1848 0.667 0.506120 # Now to try this in `patsy`. # # Steps: # 1. See how the model description is derived from the formula # 2. Build the design matrix that the formula specifies # 3. Use the design matrix in order to create the model in `scikit-learn` # In[87]: from patsy import ModelDesc, EvalEnvironment # In[88]: env = EvalEnvironment.capture() predicted_lat_age_mtx = ModelDesc.from_formula('Predicted ~ Age_Calc * Case', env) # In[89]: predicted_lat_age_mtx # In[90]: from patsy import dmatrix # In[91]: