def __init__(self, X=None, y=None, theta=None, model_order=3): if X is None: raise (ValueError, "Data matrix must be specified") if len(X.shape) == 1: self.D = 1 else: self.D = X.shape[1] # Force a default value and check datatype if model_order is None: model_order = 3 elif type(model_order) is not int: model_order = int(model_order) self._n_params = 1 + self.D * model_order self._model_order = model_order print("initialising BLR ( order", model_order, ")") if (theta is None) or (len(theta) != self._n_params): print("Using default hyperparameters") self.theta0 = np.zeros(self._n_params) else: self.theta0 = theta self.theta = self.theta0 if (theta is not None) and (y is not None): self.Phi = create_poly_basis(X, self._model_order) self.gpr = BLR(theta, self.Phi, y) else: self.gpr = BLR()
def estimate(self, X, y, theta=None): if not hasattr(self, 'Phi'): self.Phi = create_poly_basis(X, self._model_order) if len(y.shape) > 1: y = y.ravel() if theta is None: theta = self.theta0 self.blr = BLR(theta, self.Phi, y) self.theta = self.blr.estimate(theta, self.Phi, y) return self.theta
def estimate(self, X, y, **kwargs): theta = kwargs.pop('theta', None) if not hasattr(self, 'Phi'): self.Phi = create_poly_basis(X, self._model_order) if len(y.shape) > 1: y = y.ravel() if theta is None: theta = self.theta0 self.blr = BLR(theta, self.Phi, y, var_groups=self.var_groups) self.theta = self.blr.estimate(theta, self.Phi, y, optimizer=self.optim_alg) return self
class NormBLR(NormBase): """ Normative modelling based on Bayesian Linear Regression """ def __init__(self, X=None, y=None, theta=None, model_order=3): if X is None: raise (ValueError, "Data matrix must be specified") if len(X.shape) == 1: self.D = 1 else: self.D = X.shape[1] # Force a default value and check datatype if model_order is None: model_order = 3 elif type(model_order) is not int: model_order = int(model_order) self._n_params = 1 + self.D * model_order self._model_order = model_order print("initialising BLR ( order", model_order, ")") if (theta is None) or (len(theta) != self._n_params): print("Using default hyperparameters") self.theta0 = np.zeros(self._n_params) else: self.theta0 = theta self.theta = self.theta0 if (theta is not None) and (y is not None): self.Phi = create_poly_basis(X, self._model_order) self.gpr = BLR(theta, self.Phi, y) else: self.gpr = BLR() @property def n_params(self): return self._n_params @property def neg_log_lik(self): return self.blr.nlZ def estimate(self, X, y, theta=None): if not hasattr(self, 'Phi'): self.Phi = create_poly_basis(X, self._model_order) if len(y.shape) > 1: y = y.ravel() if theta is None: theta = self.theta0 self.blr = BLR(theta, self.Phi, y) self.theta = self.blr.estimate(theta, self.Phi, y) return self.theta def predict(self, X, y, Xs, theta=None): if theta is None: theta = self.theta Phis = create_poly_basis(Xs, self._model_order) yhat, s2 = self.blr.predict(theta, self.Phi, y, Phis) return yhat, s2
def estimate(filename, maskfile, basis, ard=False, outputall=False, saveoutput=True): """ Estimate a trend surface model This will estimate a trend surface model, independently for each subject. This is currently fit using a polynomial model of a specified degree. The models are estimated on the basis of data stored on disk in ascii or neuroimaging data formats (currently nifti only). Ascii data should be in tab or space delimited format with the number of voxels in rows and the number of subjects in columns. Neuroimaging data will be reshaped into the appropriate format Basic usage:: estimate(filename, maskfile, basis) where the variables are defined below. Note that either the cfolds parameter or (testcov, testresp) should be specified, but not both. :param filename: 4-d nifti file containing the images to be estimated :param maskfile: nifti mask used to apply to the data :param basis: model order for the interpolating polynomial All outputs are written to disk in the same format as the input. These are: :outputs: * yhat - predictive mean * ys2 - predictive variance * trendcoeff - coefficients from the trend surface model * negloglik - Negative log marginal likelihood * hyp - hyperparameters * explainedvar - explained variance * rmse - standardised mean squared error """ # load data print("Processing data in", filename) Y, X, mask = load_data(filename, maskfile) Y = np.round( 10000 * Y) / 10000 # truncate precision to avoid numerical probs if len(Y.shape) == 1: Y = Y[:, np.newaxis] N = Y.shape[1] # standardize responses and covariates mY = np.mean(Y, axis=0) sY = np.std(Y, axis=0) Yz = (Y - mY) / sY mX = np.mean(X, axis=0) sX = np.std(X, axis=0) Xz = (X - mX) / sX # create basis set and set starting hyperparamters Phi = create_basis(Xz, basis, mask) if ard is True: hyp0 = np.zeros(Phi.shape[1] + 1) else: hyp0 = np.zeros(2) # estimate the models for all subjects if ard: print('ARD is enabled') yhat = np.zeros_like(Yz) ys2 = np.zeros_like(Yz) nlZ = np.zeros(N) hyp = np.zeros((N, len(hyp0))) rmse = np.zeros(N) ev = np.zeros(N) m = np.zeros((N, Phi.shape[1])) bs2 = np.zeros((N, Phi.shape[1])) for i in range(0, N): print("Estimating model ", i + 1, "of", N) breg = BLR() hyp[i, :] = breg.estimate(hyp0, Phi, Yz[:, i]) m[i, :] = breg.m nlZ[i] = breg.nlZ # compute extra measures (e.g. marginal variances)? if outputall: bs2[i] = np.sqrt(np.diag(np.linalg.inv(breg.A))) # compute predictions and errors yhat[:, i], ys2[:, i] = breg.predict(hyp[i, :], Phi, Yz[:, i], Phi) yhat[:, i] = yhat[:, i] * sY[i] + mY[i] rmse[i] = np.sqrt(np.mean((Y[:, i] - yhat[:, i])**2)) ev[i] = 100 * (1 - (np.var(Y[:, i] - yhat[:, i]) / np.var(Y[:, i]))) print("Variance explained =", ev[i], "% RMSE =", rmse[i]) print("Mean (std) variance explained =", ev.mean(), "(", ev.std(), ")") print("Mean (std) RMSE =", rmse.mean(), "(", rmse.std(), ")") # Write output if saveoutput: print("Writing output ...") np.savetxt("trendcoeff.txt", m, delimiter='\t', fmt='%5.8f') np.savetxt("negloglik.txt", nlZ, delimiter='\t', fmt='%5.8f') np.savetxt("hyp.txt", hyp, delimiter='\t', fmt='%5.8f') np.savetxt("explainedvar.txt", ev, delimiter='\t', fmt='%5.8f') np.savetxt("rmse.txt", rmse, delimiter='\t', fmt='%5.8f') fileio.save_nifti(yhat, 'yhat.nii.gz', filename, mask) fileio.save_nifti(ys2, 'ys2.nii.gz', filename, mask) if outputall: np.savetxt("trendcoeffvar.txt", bs2, delimiter='\t', fmt='%5.8f') else: out = [yhat, ys2, nlZ, hyp, rmse, ev, m] if outputall: out.append(bs2) return out
def __init__(self, **kwargs): #X=None, y=None, theta=None, X = kwargs.pop('X', None) y = kwargs.pop('y', None) theta = kwargs.pop('theta', None) self.optim_alg = kwargs.pop('optimizer', 'cg') if X is None: raise (ValueError, "Data matrix must be specified") if len(X.shape) == 1: self.D = 1 else: self.D = X.shape[1] # Parse model order if kwargs is None: model_order = 1 elif 'configparam' in kwargs: model_order = kwargs.pop('configparam') elif 'model_order' in kwargs: model_order = kwargs.pop('model_order') else: model_order = 1 # Force a default value and check datatype if model_order is None: model_order = 1 if type(model_order) is not int: model_order = int(model_order) if 'var_groups' in kwargs: var_groups_file = kwargs.pop('var_groups') if var_groups_file.endswith('.pkl'): self.var_groups = pd.read_pickle(var_groups_file) else: self.var_groups = np.loadtxt(var_groups_file) var_ids = set(self.var_groups) var_ids = sorted(list(var_ids)) n_beta = len(var_ids) else: self.var_groups = None n_beta = 1 # are we using ARD? if 'use_ard' in kwargs: self.use_ard = kwargs.pop('use_ard') else: self.use_ard = False if self.use_ard: n_alpha = self.D * model_order else: n_alpha = 1 self._n_params = n_alpha + n_beta self._model_order = model_order print("initialising BLR ( order", model_order, ")") if (theta is None) or (len(theta) != self._n_params): print("Using default hyperparameters") self.theta0 = np.zeros(self._n_params) else: self.theta0 = theta self.theta = self.theta0 if (theta is not None) and (y is not None): self.Phi = create_poly_basis(X, self._model_order) self.blr = BLR(theta, self.Phi, y) else: self.blr = BLR()
class NormBLR(NormBase): """ Normative modelling based on Bayesian Linear Regression """ def __init__(self, **kwargs): #X=None, y=None, theta=None, X = kwargs.pop('X', None) y = kwargs.pop('y', None) theta = kwargs.pop('theta', None) self.optim_alg = kwargs.pop('optimizer', 'cg') if X is None: raise (ValueError, "Data matrix must be specified") if len(X.shape) == 1: self.D = 1 else: self.D = X.shape[1] # Parse model order if kwargs is None: model_order = 1 elif 'configparam' in kwargs: model_order = kwargs.pop('configparam') elif 'model_order' in kwargs: model_order = kwargs.pop('model_order') else: model_order = 1 # Force a default value and check datatype if model_order is None: model_order = 1 if type(model_order) is not int: model_order = int(model_order) if 'var_groups' in kwargs: var_groups_file = kwargs.pop('var_groups') if var_groups_file.endswith('.pkl'): self.var_groups = pd.read_pickle(var_groups_file) else: self.var_groups = np.loadtxt(var_groups_file) var_ids = set(self.var_groups) var_ids = sorted(list(var_ids)) n_beta = len(var_ids) else: self.var_groups = None n_beta = 1 # are we using ARD? if 'use_ard' in kwargs: self.use_ard = kwargs.pop('use_ard') else: self.use_ard = False if self.use_ard: n_alpha = self.D * model_order else: n_alpha = 1 self._n_params = n_alpha + n_beta self._model_order = model_order print("initialising BLR ( order", model_order, ")") if (theta is None) or (len(theta) != self._n_params): print("Using default hyperparameters") self.theta0 = np.zeros(self._n_params) else: self.theta0 = theta self.theta = self.theta0 if (theta is not None) and (y is not None): self.Phi = create_poly_basis(X, self._model_order) self.blr = BLR(theta, self.Phi, y) else: self.blr = BLR() @property def n_params(self): return self._n_params @property def neg_log_lik(self): return self.blr.nlZ def estimate(self, X, y, **kwargs): theta = kwargs.pop('theta', None) if not hasattr(self, 'Phi'): self.Phi = create_poly_basis(X, self._model_order) if len(y.shape) > 1: y = y.ravel() if theta is None: theta = self.theta0 self.blr = BLR(theta, self.Phi, y, var_groups=self.var_groups) self.theta = self.blr.estimate(theta, self.Phi, y, optimizer=self.optim_alg) return self def predict(self, Xs, X, y, **kwargs): theta = kwargs.pop('theta', None) if theta is None: theta = self.theta Phis = create_poly_basis(Xs, self._model_order) if 'var_groups_test' in kwargs: var_groups_test_file = kwargs.pop('var_groups_test') if var_groups_test_file.endswith('.pkl'): var_groups_te = pd.read_pickle(var_groups_test_file) else: var_groups_te = np.loadtxt(var_groups_test_file) else: var_groups_te = None yhat, s2 = self.blr.predict(theta, self.Phi, y, Phis, var_groups_test=var_groups_te) return yhat, s2