def plot_model(self, gp, x): plt.figure(figsize=(12, 6)) plt.subplot(121) predx = patsy.build_design_matrices([x.design_info], self.xbase(strain='ura3'))[0] mu, var = gp.predict(predx[:, 1:]) mu = mu[:, 0] var = var[:, 0] plt.plot(self.full_time, mu, color='k') plt.fill_between(self.full_time, mu - 2 * np.sqrt(var), mu + 2 * np.sqrt(var), color='k', alpha=.2) plt.subplot(122) predx = patsy.build_design_matrices([x.design_info], self.xbase())[0] mu, var = gp.predict(predx[:, 1:]) mu = mu[:, 0] var = var[:, 0] plt.plot(self.full_time, mu, color='g') plt.fill_between(self.full_time, mu - 2 * np.sqrt(var), mu + 2 * np.sqrt(var), alpha=.2, color='g') plt.savefig("figures/%s/model/%s.png" % (self.label, self.current_strain), bbox_inches="tight") plt.close()
def transform(self, X): X = X.copy() if not self.handle_na is None: X = self.handle_na.transform(X) if self.return_X & self.return_y: X_transform = patsy.build_design_matrices( [self.X], X, return_type=self.return_type)[0] y_transform = patsy.build_design_matrices( [self.y], X, return_type=self.return_type)[0] ans = (y_transform, X_transform) elif self.return_X: X_transform = patsy.build_design_matrices( [self.X], X, return_type=self.return_type)[0] ans = X_transform elif self.return_y: y_transform = patsy.build_design_matrices( [self.y], X, return_type=self.return_type)[0] ans = y_transform else: raise ValueError(self, "Need to choose an return X or return Y") return ans
def predict(self, new_data=None): """Function returning the predictions of a model_binomial_iCAR model. Function to return the predictions of a model_binomial_iCAR model for a new data-set. :param model: model_binomial_iCAR to predict from. :param new_data: A dict-like object which will be used to look \ up data (including explicative variables and cell values). :return: prediction (a probability). """ # Data if (new_data is None): (new_x, ) = build_design_matrices([self._x_design_info], self.data) else: (new_x, ) = build_design_matrices([self._x_design_info], new_data) X = new_x[:, :-1] cell = new_x[:, -1].astype(np.int) # Rho if (len(self.rho.shape) == 1): rho = self.rho[cell] else: rho = np.mean(self.rho, axis=0)[cell] return (invlogit(np.dot(X, self.betas) + rho))
def compute_delta(m,x,x_base={},x_1={},x_2={},function=True,derivative=False,derivative_ind=None,xslice=lambda x: x[:,1:],xchange_1=None,xchange_2=None): x_temp = dict_copy(x_base,{}) x_temp = dict_copy(x_1,x_temp) predx = patsy.build_design_matrices([x.design_info],x_temp)[0] predx = xslice(predx) if derivative: mu_1,_ = m.predictive_gradients(predx) mu_1 = mu_1[:,derivative_ind,0] if function: _,var_1 = m._raw_predict(predx) else: temp,var_1 = m.predict(predx) var_1 = var_1[:,0]; #var_1 = kernel_derivative(predx,var_1,m.kern.lengthscale[derivative_ind])[:,:,derivative_ind]; print var_1.shape var_1 = 1./m.kern.lengthscale[derivative_ind] * var_1 if var_1.ndim > 1: var_1 = np.diag(var_1) else: if function: mu_1,var_1 = m._raw_predict(predx) else: mu_1,var_1 = m.predict(predx) mu_1 = mu_1[:,0] var_1 = var_1[:,0] x_temp = dict_copy(x_base,{}) x_temp = dict_copy(x_2,x_temp) predx = patsy.build_design_matrices([x.design_info],x_temp)[0] predx = xslice(predx) if xchange_2: predx = xchange_2(predx) if derivative: mu_2,_ = m.predictive_gradients(predx) mu_2 = mu_2[:,derivative_ind,0] if function: _,var_2 = m._raw_predict(predx) else: _,var_2 = m.predict(predx) var_2 = var_2[:,0] var_2 = 1./m.kern.lengthscale[derivative_ind] * var_2 if var_2.ndim > 1: var_2 = np.diag(var_2) else: if function: mu_2,var_2 = m._raw_predict(predx) else: mu_2,var_2 = m.predict(predx) mu_2 = mu_2[:,0] var_2 = var_2[:,0] return mu_1-mu_2, (np.sqrt(var_1) + np.sqrt(var_2))**2
def test_issue_11(): # Give a sensible error message for level mismatches # (At some points we've failed to put an origin= on these errors) env = EvalEnvironment.capture() data = {"X": [0, 1, 2, 3], "Y": [1, 2, 3, 4]} formula = "C(X) + Y" new_data = {"X": [0, 0, 1, 2, 3, 3, 4], "Y": [1, 2, 3, 4, 5, 6, 7]} info = dmatrix(formula, data) try: build_design_matrices([info.design_info.builder], new_data) except PatsyError, e: assert e.origin == Origin(formula, 0, 4)
def test_issue_11(): # Give a sensible error message for level mismatches # (At some points we've failed to put an origin= on these errors) env = EvalEnvironment.capture() data = {"X" : [0,1,2,3], "Y" : [1,2,3,4]} formula = "C(X) + Y" new_data = {"X" : [0,0,1,2,3,3,4], "Y" : [1,2,3,4,5,6,7]} info = dmatrix(formula, data) try: build_design_matrices([info.design_info.builder], new_data) except PatsyError, e: assert e.origin == Origin(formula, 0, 4)
def GAM_design_test(X_train, X_test, dfs): if type(X_test)!=np.ndarray: X_test = np.array(X_test) p=X_train.shape[1] train_splines = [] test_splines = [] for j in range(p): if dfs[j] > 0: if dfs[j]==1: train_splines.append(X_train[:,j].reshape((-1,1))) test_splines.append(X_test[:,j].reshape((-1,1))) else: a=min(np.min(X_train[:,j]), np.min(X_test[:,j])) # lower bound b=max(np.max(X_train[:,j]), np.max(X_test[:,j])) # upper bound if dfs[j]==2: X = dmatrix('bs(x, degree=1, df=2, lower_bound=a, upper_bound=b) - 1',{'x': X_train[:,j]}, return_type='matrix') else: X = dmatrix('cr(x, df=dfs[j], lower_bound=a, upper_bound=b) - 1', {'x': X_train[:,j]}, return_type='matrix') train_splines.append(X) test_splines.append(build_design_matrices([X.design_info], {'x': X_test[:,j]})[0]) X_train_gam = np.hstack(train_splines) X_test_gam = np.hstack(test_splines) return X_train_gam, X_test_gam
def predict(self, data, linear=False): if len(data) == 0: return [] # identifies exponential variables from the design matrix (via the 'power' flag) and converts to float64 # this prevents mis-specification of probabilities in cases of variable overflow # (if the original var was compressed to a smaller bit integer/float) power_vars = list(set([ re.search(r'(?<=power\().+?(?=,)', column).group() for column in \ self._X_design_info.column_names if 'power' in column ])) for var in power_vars: data[var] = data[var].astype('float64') (X, ) = patsy.build_design_matrices([self._X_design_info], data) # apply betas to data linear_transforms = linear_transform(np.asarray(X), np.asarray(self._betas)) linear_transforms = np.concatenate( [np.zeros((len(data), 1)), linear_transforms], axis=1) linear_transforms = np.exp(linear_transforms) rescaled_data = pd.DataFrame( linear_transforms / np.sum(linear_transforms, axis=1, keepdims=True)) return rescaled_data
def make_design_matrix_no_speed(lagged_is_replay, lagged_speed, design_matrix): predict_data = { 'lagged_is_replay': lagged_is_replay * np.ones_like(lagged_speed), } return build_design_matrices([design_matrix.design_info], predict_data, NA_action=NAAction(NA_types=[]))[0]
def infer_discrete_state_transition_from_training_data(is_non_local, penalty=1e-5): data = pd.DataFrame({ 'is_non_local': is_non_local.astype(np.float64), 'lagged_is_non_local': lagmat(is_non_local, maxlag=1).astype(np.float64).squeeze(), }).dropna() MODEL_FORMULA = 'is_non_local ~ 1 + lagged_is_non_local' response, design_matrix = dmatrices(MODEL_FORMULA, data) penalty = np.ones((design_matrix.shape[1], )) * penalty penalty[0] = 0.0 fit = penalized_IRLS(design_matrix, response, family=families.Binomial(), penalty=penalty) predict_data = { 'lagged_is_non_local': np.asarray([0, 1]), } predict_design_matrix = build_design_matrices( [design_matrix.design_info], predict_data, NA_action=NAAction(NA_types=[]))[0] non_local_probability = families.Binomial().link.inverse( predict_design_matrix @ np.squeeze(fit.coefficients)) non_local_probability[np.isnan(non_local_probability)] = 0.0 return np.asarray( [[1 - non_local_probability[0], non_local_probability[0]], [1 - non_local_probability[1], non_local_probability[1]]])
def predict(self, X): data = {'x{}'.format(i + 1): x for i, x in enumerate(X.T)} design_matrix = build_design_matrices([self.design_info], data)[0] if np.isscalar(self.quantiles): return self.model.predict({'x': design_matrix}) return np.array([m.predict({'x': design_matrix}) for m in self.model]).T
def gp_predict(gp, x, design_info): predx = patsy.build_design_matrices([x.design_info], { 'time': time, 'Strain': ['ura3'] * 50, 'paraquat': [0] * 50 })[0] mu, var = gp.predict(predx[:, 1:])
def GAM_splines(X_train, X_test, nonlinear, dfs): linear = [x for x in list(X_train.columns) if x not in nonlinear] # linear predictors train_splines = [] test_splines = [] for i, predictor in enumerate(nonlinear): a = min(X_train[predictor].min(), X_test[predictor].min()) # lower bound b = max(X_train[predictor].max(), X_test[predictor].max()) # upper bound X = dmatrix('cr(x, df=dfs[i], lower_bound=a, upper_bound=b) - 1', {'x': X_train[predictor]}, return_type='dataframe') train_splines.append(X.as_matrix()) test_splines.append( build_design_matrices([X.design_info], {'x': X_test[predictor]})[0]) X_train_gam = np.hstack( train_splines ) # merges the splines fror different predictors into one matrix X_train_gam = np.hstack( (X_train_gam, X_train[linear])) # merges the splines with the linear predictors X_test_gam = np.hstack(test_splines) X_test_gam = np.hstack((X_test_gam, X_test[linear])) return X_train_gam, X_test_gam
def get_bspline_design_matrix(timepoints, n_knots, df, degree): """ Use ``patsy`` to get a spline basis for a given set of timepoints. Parameters ------------- timepoints : The times of the observed gene expressions. n_knots : `int` specifies the number of knots, which will be equally spaced between min(timepoints) and max(timepoints) df : `int` The degrees of freedom of the spline basis. degree : `int` The degree of the spline basis. """ knots = np.linspace(np.min(timepoints), np.max(timepoints), n_knots) knots = {"x": knots} timepoints_dict = {"x": timepoints} design_string = \ "bs(x, df={}, degree={}, include_intercept=True) - 1".format(df, degree) design_matrix = patsy.dmatrix(design_string, knots) x_bs = patsy.build_design_matrices([design_matrix.design_info], timepoints_dict)[0] return x_bs
def create_predict_design_matrix(position, design_matrix): is_nan = np.isnan(position) position[is_nan] = 0 predictors = {'position': position} design_matrix = build_design_matrices( [design_matrix.design_info], predictors)[0] design_matrix[is_nan] = np.nan return design_matrix
def predict(self, data): if len(data) == 0: return [] (X, ) = patsy.build_design_matrices([self._X_design_info], data) return linear_transform(numpy.asarray(X), self._betas)
def predict(self, demand_fixture_data, params=None): ''' Predicts across index using fitted model params Parameters ---------- demand_fixture_data : pandas.DataFrame Formatted input data as returned by :code:`ModelDataFormatter.create_demand_fixture()` params : dict, default None Parameters found during model fit. If None, `.fit()` must be called before this method can be used. - :code:`X_design_matrix`: patsy design matrix used in formatting design matrix. - :code:`formula`: patsy formula used in creating design matrix. - :code:`coefficients`: ElasticNetCV coefficients. - :code:`intercept`: ElasticNetCV intercept. Returns ------- output : pandas.DataFrame Dataframe of energy values as given by the fitted model across the index given in :code:`demand_fixture_data`. ''' # needs only tempF if params is None: params = self.params model_data = demand_fixture_data.resample(self.model_freq).agg( {'tempF': np.mean}) model_data.loc[:, 'CDD'] = np.maximum(model_data.tempF - self.cooling_base_temp, 0.) model_data.loc[:, 'HDD'] = np.maximum(self.heating_base_temp - model_data.tempF, 0.) holiday_names = self._holidays_indexed(model_data.index) model_data.loc[:, 'holiday_name'] = holiday_names design_info = params["X_design_info"] (X,) = patsy.build_design_matrices([design_info], model_data, return_type='dataframe') model_obj = linear_model.ElasticNetCV(l1_ratio=self.l1_ratio, fit_intercept=False) model_obj.coef_ = params["coefficients"] model_obj.intercept_ = params["intercept"] predicted = pd.Series(model_obj.predict(X), index=X.index) # add NaNs back in predicted = predicted.reindex(model_data.index) return predicted
def basis_column_new_(self, x, design_info): """ extract basis matrix for a column vector x @param x: column vector @param design_info: design info class @return: the basis matrix """ matrix = np.array(build_design_matrices([design_info], {"x": x})[0]) return np.array(matrix)[:, 1:]
def create_predict_design_matrix(position, design_matrix): position = atleast_2d(position) is_nan = np.any(np.isnan(position), axis=1) position[is_nan] = 0 predictors = {'position': position} design_matrix = build_design_matrices( [design_matrix.design_info], predictors)[0] design_matrix[is_nan] = np.nan return design_matrix
def _predictors_by_trajectory_direction(trajectory_direction, place_bin_centers, design_matrix): '''The design matrix for a given trajectory direction ''' predictors = { 'linear_distance': place_bin_centers, 'trajectory_direction': [trajectory_direction] * len(place_bin_centers) } return build_design_matrices([design_matrix.design_info], predictors)[0]
def loglik(self, new_data): (new_y, new_x) = build_design_matrices( [self._y_design_info, self._x_design_info], new_data) new_pred = np.dot(new_x, self.betas) sigma2 = self.rss / self.nobs # It'd be more elegant to use scipy.stats.norm.logpdf here, but adding # a dependency on scipy makes the docs build more complicated: Z = -0.5 * np.log(2 * np.pi * sigma2) return Z + -0.5 * (new_y - new_x)**2 / sigma2
def predictors_by_experimental_condition(experimental_condition, place_bin_centers, design_matrix): '''The design matrix for a given trajectory direction. ''' predictors = { 'position': place_bin_centers, 'experimental_condition': [experimental_condition] * len(place_bin_centers) } return build_design_matrices([design_matrix.design_info], predictors)[0]
def loglik(self, new_data): (new_y, new_x) = build_design_matrices([self._y_design_info, self._x_design_info], new_data) new_pred = np.dot(new_x, self.betas) sigma2 = self.rss / self.nobs # It'd be more elegant to use scipy.stats.norm.logpdf here, but adding # a dependency on scipy makes the docs build more complicated: Z = -0.5 * np.log(2 * np.pi * sigma2) return Z + -0.5 * (new_y - new_x) ** 2/sigma2
def transform(self, X): """ Applies the formula to the matrix/dataframe X. Returns an design array that can be used in sklearn pipelines. """ check_is_fitted(self, 'design_info_') try: return build_design_matrices([self.design_info_], X)[0] except PatsyError as e: raise RuntimeError from e
def get_levels(self, level_dict): level_dict_ = {} #Make sure all dictionary items are lists for key, value in level_dict.items(): level_dict_[key] = [value] return np.asarray( patsy.build_design_matrices( [self.X.design_info], data=pd.DataFrame(level_dict_))).squeeze()
def make_spline_predict_matrix(design_info, position): position = atleast_2d(position) is_nan = np.any(np.isnan(position), axis=1) position[is_nan] = 0.0 predict_data = {} for ind in range(position.shape[1]): predict_data[f'x{ind}'] = position[:, ind] design_matrix = build_design_matrices([design_info], predict_data)[0] design_matrix[is_nan] = np.nan return design_matrix
def predict(self, new_data=None, **kwargs): """Function returning the predictions of a model_random_forest model. Function to return the predictions of a model_random_forest model for a new data-set. :param model: model_random_forest to predict from. :param new_data: A dict-like object which will be used to look \ up data (including explicative variables and cell values). :return: prediction (a probability). """ # Data if (new_data is None): (new_x, ) = build_design_matrices([self._x_design_info], self.data) else: (new_x, ) = build_design_matrices([self._x_design_info], new_data) # Predictions rf_pred = np.array(self.rf.predict_proba(new_x, **kwargs)[:, 1]) return (rf_pred)
def predict(self, data, linear=False): if len(data) == 0: return [] (X, ) = patsy.build_design_matrices([self._X_design_info], data) if not linear: return self._link.inverse(self._link(), linear_transform( numpy.asarray(X), self._betas)) else: return linear_transform(numpy.asarray(X), self._betas)
def transform(self, X): """ Applies the formula to the matrix/dataframe X. Returns - A patsy.DesignMatrix, if return_type="matrix" (the default) - A pandas.DataFrame, if return_type="dataframe" """ check_is_fitted(self, "design_info_") try: return build_design_matrices([self.design_info_], X, return_type=self.return_type)[0] except PatsyError as e: raise RuntimeError from e
def plot_model(self,gp,x): plt.figure(figsize=(12,6)) plt.subplot(121) predx = patsy.build_design_matrices([x.design_info],self.xbase(strain='ura3'))[0] mu,var = gp.predict(predx[:,1:]) mu = mu[:,0] var = var[:,0] plt.plot(self.full_time,mu,color='k') plt.fill_between(self.full_time,mu-2*np.sqrt(var),mu+2*np.sqrt(var),color='k',alpha=.2) plt.subplot(122) predx = patsy.build_design_matrices([x.design_info],self.xbase())[0] mu,var = gp.predict(predx[:,1:]) mu = mu[:,0] var = var[:,0] plt.plot(self.full_time,mu,color='g') plt.fill_between(self.full_time,mu-2*np.sqrt(var),mu+2*np.sqrt(var),alpha=.2,color='g') plt.savefig("figures/%s/model/%s.png"%(self.label,self.current_strain),bbox_inches="tight") plt.close()
def predict(self, newdata, burn=None, seed=None): """ Return an LmSpikePrediciton object. """ if burn is None: burn = R.suggest_burn(self.log_likelihood) if seed is not None: boom.GlobalRng.rng.seed(int(seed)) if isinstance(newdata, np.ndarray) and len(newdata.shape) == 1: newdata = newdata.reshape(1, -1) if isinstance(newdata, np.ndarray) and newdata.shape[1] == self.xdim: predictors = newdata else: predictors = patsy.build_design_matrices([self._x_design_info], data=newdata)[0] return self._coefficient_draws[burn:, :] @ predictors.T
def find_knn(self, df, n=5): knn = NearestNeighbors(n_neighbors=n).fit(self.X_train) X = patsy.build_design_matrices([self._X.design_info], df, return_type='dataframe')[0] distances, indices = knn.kneighbors(X) X['nn'] = [ind for ind in indices] dfs = {} dfs2 = [] for index, row in X.iterrows(): dfs[index] = { "train": self.X_train.loc[row.nn], "raw_df": self.df.loc[row.nn] } return dfs
def predict_binomial_iCAR(model, new_data, rhos): """Function to return the predictions of a model_binomial_iCAR model. Function to return the predictions of a model_binomial_iCAR model for a new data-set. In this function, rho values for spatial cells are directly provided and not obtained from the model. :param model: model_binomial_iCAR model to predict from. :param new_data: pandas DataFrame including explicative variables. :param rhos: spatial random effects for each observation (row) in new_data. :return: prediction (a probability). """ (new_x, ) = build_design_matrices([model._x_design_info], new_data) new_X = new_x[:, :-1] return (invlogit(np.dot(new_X, model.betas) + rhos))
def likelihood(self, data=None, log=False): """ Calculate (log)likelihood given data Parameters ========== data: dict-like with numpy array values specify likelihood of which point, if None is given, data in `__init__` function will be used log: bool whether to calculate log likelihood or just likelihood """ mat_y, mat_x = build_design_matrices([self.model[0].design_info, self.model[1].design_info], data) pred_y = mat_x.dot(self.coef) n, _ = self.model[1].shape sig2 = self.rss/n # MLE of variance out = -0.5*np.log(2*np.pi*sig2)-0.5*(mat_y-pred_y)**2/sig2 return out if log else np.exp(out)
def predict_proba(self, X): """ Compute predicted probabilities. Parameters ---------- X : dict-like object Object to look up variables referenced in ``formula_like``. See `patsy.dmatrix`. Returns ------- y_score : 1D array-like, shape=(n_samples,) Predicted probabilities for churners. """ dmat_test = patsy.build_design_matrices([self.design_info], X)[0] y_score = super().predict_proba(dmat_test) return y_score
def predict(self, data, alpha=0.05): """ Predict y in x Parameters ========== data, numpy array Returns ======= Predicted value as a numpy array """ pred_info = self.model[1].design_info (mat,) = build_design_matrices([pred_info], data) n, p = self.model[1].shape X = self.model[1] sig2 = self.rss / (n - p) XTX_inv = np.linalg.inv(X.T.dot(X)) var = sig2*(1 + mat.dot(XTX_inv).dot(mat.T).diagonal()) return mat.dot(self.coef), np.sqrt(var)
def predict(self, new_data): (new_x,) = build_design_matrices([self._x_design_info], new_data) return np.dot(new_x, self.betas)
def gp_predict(gp,x,design_info): predx = patsy.build_design_matrices([x.design_info],{'time':time,'Strain':['ura3']*50,'paraquat':[0]*50})[0] mu,var = gp.predict(predx[:,1:])