def _train(self, data): """Train the classifier using `data` (`Dataset`). """ targets = data.sa[self.get_space()].value[:, np.newaxis] enet_kwargs = {} if self.__max_steps is not None: enet_kwargs['max.steps'] = self.__max_steps try: self.__trained_model = trained_model = \ r.enet(data.samples, targets, self.__lm, normalize=self.__normalize, intercept=self.__intercept, trace=self.__trace, **enet_kwargs) except RRuntimeError as e: raise FailedToTrainError("Failed to predict on %s using %s. Exceptions was: %s" \ % (data, self, e)) # find the step with the lowest Cp (risk) # it is often the last step if you set a max_steps # must first convert dictionary to array # Cp_vals = np.asarray([trained_model['Cp'][str(x)] # for x in range(len(trained_model['Cp']))]) # self.__lowest_Cp_step = Cp_vals.argmin() # set the weights to the last step beta_pure = np.asanyarray(Rrx2(trained_model, 'beta.pure')) self.__beta_pure_shape = beta_pure.shape self.__weights = np.zeros(data.nfeatures, dtype=beta_pure.dtype) ind = np.asanyarray(Rrx2(trained_model, 'allset')) - 1 self.__weights[ind] = beta_pure[-1, :]
def _train(self, data): """Train the classifier using `data` (`Dataset`). """ targets = data.sa[self.get_space()].value[:, np.newaxis] # some non-Python friendly R-lars arguments lars_kwargs = {'use.Gram': self.__use_Gram} if self.__max_steps is not None: lars_kwargs['max.steps'] = self.__max_steps trained_model = r.lars(data.samples, targets, type=self.__type, normalize=self.__normalize, intercept=self.__intercept, trace=self.__trace, **lars_kwargs) #import pydb #pydb.debugger() # find the step with the lowest Cp (risk) # it is often the last step if you set a max_steps # must first convert dictionary to array Cp_vals = None try: Cp_vals = np.asanyarray(Rrx2(trained_model, 'Cp')) except TypeError, e: raise FailedToTrainError, \ "Failed to train %s on %s. Got '%s' while trying to access " \ "trained model %s" % (self, data, e, trained_model)
def _predict(self, data): """Predict using the trained MASS learner """ try: output = r.predict(self._R_model, data, **self._kwargs_predict) # TODO: access everything computed, and assign to # ca's: res.names classes = Rrx2(output, 'class') # TODO: move to helper function to be used generically if classes.rclass[0] == 'factor': classes = [int(classes.levels[i - 1]) for i in classes] if 'posterior' in output.names: self.ca.posterior = np.asarray(Rrx2(output, 'posterior')) res = np.asarray(classes) except Exception, e: raise FailedToPredictError, \ "Failed to predict %s on data of shape %s. Got '%s' during" \ " call to predict()." % (self, data.shape, e)
def _train(self, data): """Train the classifier using `data` (`Dataset`). """ targets = data.sa[self.get_space()].value[:, np.newaxis] # some non-Python friendly R-lars arguments lars_kwargs = {'use.Gram': self.__use_Gram} if self.__max_steps is not None: lars_kwargs['max.steps'] = self.__max_steps trained_model = r.lars(data.samples, targets, type=self.__type, normalize=self.__normalize, intercept=self.__intercept, trace=self.__trace, **lars_kwargs) #import pydb #pydb.debugger() # find the step with the lowest Cp (risk) # it is often the last step if you set a max_steps # must first convert dictionary to array Cp_vals = None try: Cp_vals = np.asanyarray(Rrx2(trained_model, 'Cp')) except TypeError as e: raise FailedToTrainError("Failed to train %s on %s. Got '%s' while trying to access " \ "trained model %s" % (self, data, e, trained_model)) if Cp_vals is None: # if there were no any -- just choose 0th lowest_Cp_step = 0 elif np.isnan(Cp_vals[0]): # sometimes may come back nan, so just pick the last one lowest_Cp_step = len(Cp_vals) - 1 else: # determine the lowest lowest_Cp_step = Cp_vals.argmin() self.__lowest_Cp_step = lowest_Cp_step # set the weights to the lowest Cp step self.__weights = np.asanyarray(Rrx2(trained_model, 'beta'))[lowest_Cp_step] self.__trained_model = trained_model # bind to an instance
def _train(self, dataset): """Train the classifier using `data` (`Dataset`). """ # process targets based on the model family targets = dataset.sa[self.get_space()].value if self.params.family == 'gaussian': # do nothing, just save the targets as a list #targets = targets.tolist() self._utargets = None elif self.params.family == 'multinomial': # turn lables into list of range values starting at 1 #targets = _label2indlist(dataset.targets, # dataset.uniquetargets) targets_unique = dataset.sa[self.get_space()].unique targets = _label2oneofm(targets, targets_unique) # save some properties of the data/classification self._utargets = targets_unique.copy() # process the pmax if self.params.pmax is None: # set it to the num features pmax = dataset.nfeatures else: # use the value pmax = self.params.pmax try: self.__trained_model = trained_model = \ r.glmnet(dataset.samples, targets, family=self.params.family, alpha=self.params.alpha, nlambda=self.params.nlambda, standardize=self.params.standardize, thresh=self.params.thresh, pmax=pmax, maxit=self.params.maxit, type=self.params.model_type) except RRuntimeError as e: raise FailedToTrainError("Failed to train %s on %s. Got '%s' during call r.glmnet()." \ % (self, dataset, e)) self.__last_lambda = last_lambda = \ np.asanyarray(Rrx2(trained_model, 'lambda'))[-1] # set the weights to the last step weights = r.coef(trained_model, s=last_lambda) if self.params.family == 'multinomial': self.__weights = np.hstack([np.array(r['as.matrix'](weight))[1:] for weight in weights]) elif self.params.family == 'gaussian': self.__weights = np.array(r['as.matrix'](weights))[1:, 0] else: raise NotImplementedError("Somehow managed to get here with family %s." % \ (self.params.family,))
def _predict(self, data): """Predict the output for the provided data. """ # predict with the final state (i.e., the last step) try: res = r.predict(self.__trained_model, data, mode='step', type='fit', s=rpy2.robjects.IntVector(self.__beta_pure_shape)) fit = np.asanyarray(Rrx2(res, 'fit'))[:, -1] except RRuntimeError, e: raise FailedToPredictError, \ "Failed to predict on %s using %s. Exceptions was: %s" \ % (data, self, e)
def _predict(self, data): """ Predict the output for the provided data. """ # predict with the final state (i.e., the last step) # predict with the lowest Cp step try: res = r.predict(self.__trained_model, data, mode='step', s=self.__lowest_Cp_step) #s=self.__trained_model['beta'].shape[0]) fit = np.atleast_1d(Rrx2(res, 'fit')) except RRuntimeError, e: raise FailedToPredictError, \ "Failed to predict on %s using %s. Exceptions was: %s" \ % (data, self, e)
def _predict(self, data): """Predict the output for the provided data. """ # predict with the final state (i.e., the last step) try: res = r.predict(self.__trained_model, data, mode='step', type='fit', s=rpy2.robjects.IntVector(self.__beta_pure_shape)) fit = np.asanyarray(Rrx2(res, 'fit'))[:, -1] except RRuntimeError as e: raise FailedToPredictError("Failed to predict on %s using %s. Exceptions was: %s" \ % (data, self, e)) if len(fit.shape) == 0: # if we just got 1 sample with a scalar fit = fit.reshape((1, )) self.ca.estimates = fit # charge conditional attribute return fit
class LARS(Classifier): """Least angle regression (LARS). LARS is the model selection algorithm from: Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani, Least Angle Regression Annals of Statistics (with discussion) (2004) 32(2), 407-499. A new method for variable subset selection, with the lasso and 'epsilon' forward stagewise methods as special cases. Similar to SMLR, it performs a feature selection while performing classification, but instead of starting with all features, it starts with none and adds them in, which is similar to boosting. This learner behaves more like a ridge regression in that it returns prediction values and it treats the training labels as continuous. In the true nature of the PyMVPA framework, this algorithm is actually implemented in R by Trevor Hastie and wrapped via RPy. To make use of LARS, you must have R and RPy installed as well as the LARS contributed package. You can install the R and RPy with the following command on Debian-based machines: sudo aptitude install python-rpy python-rpy-doc r-base-dev You can then install the LARS package by running R as root and calling: install.packages() """ # XXX from yoh: it is linear, isn't it? __tags__ = [ 'lars', 'regression', 'linear', 'has_sensitivity', 'does_feature_selection', 'rpy2' ] def __init__(self, model_type="lasso", trace=False, normalize=True, intercept=True, max_steps=None, use_Gram=False, **kwargs): """ Initialize LARS. See the help in R for further details on the following parameters: Parameters ---------- model_type : string Type of LARS to run. Can be one of ('lasso', 'lar', 'forward.stagewise', 'stepwise'). trace : boolean Whether to print progress in R as it works. normalize : boolean Whether to normalize the L2 Norm. intercept : boolean Whether to add a non-penalized intercept to the model. max_steps : None or int If not None, specify the total number of iterations to run. Each iteration adds a feature, but leaving it none will add until convergence. use_Gram : boolean Whether to compute the Gram matrix (this should be false if you have more features than samples.) """ # init base class first Classifier.__init__(self, **kwargs) if not model_type in known_models: raise ValueError('Unknown model %s for LARS is specified. Known' % model_type + 'are %s' % ` known_models `) # set up the params self.__type = model_type self.__normalize = normalize self.__intercept = intercept self.__trace = trace self.__max_steps = max_steps self.__use_Gram = use_Gram # pylint friendly initializations self.__lowest_Cp_step = None self.__weights = None """The beta weights for each feature.""" self.__trained_model = None """The model object after training that will be used for predictions.""" def __repr__(self): """String summary of the object """ return "LARS(type='%s', normalize=%s, intercept=%s, trace=%s, " \ "max_steps=%s, use_Gram=%s, " \ "enable_ca=%s)" % \ (self.__type, self.__normalize, self.__intercept, self.__trace, self.__max_steps, self.__use_Gram, str(self.ca.enabled)) @due.dcite(Doi('10.1214/009053604000000067'), path="mvpa2.clfs.lars:LARS", description="Least angle regression", tags=["implementation"]) def _train(self, data): """Train the classifier using `data` (`Dataset`). """ targets = data.sa[self.get_space()].value[:, np.newaxis] # some non-Python friendly R-lars arguments lars_kwargs = {'use.Gram': self.__use_Gram} if self.__max_steps is not None: lars_kwargs['max.steps'] = self.__max_steps trained_model = r.lars(data.samples, targets, type=self.__type, normalize=self.__normalize, intercept=self.__intercept, trace=self.__trace, **lars_kwargs) #import pydb #pydb.debugger() # find the step with the lowest Cp (risk) # it is often the last step if you set a max_steps # must first convert dictionary to array Cp_vals = None try: Cp_vals = np.asanyarray(Rrx2(trained_model, 'Cp')) except TypeError, e: raise FailedToTrainError, \ "Failed to train %s on %s. Got '%s' while trying to access " \ "trained model %s" % (self, data, e, trained_model) if Cp_vals is None: # if there were no any -- just choose 0th lowest_Cp_step = 0 elif np.isnan(Cp_vals[0]): # sometimes may come back nan, so just pick the last one lowest_Cp_step = len(Cp_vals) - 1 else: # determine the lowest lowest_Cp_step = Cp_vals.argmin() self.__lowest_Cp_step = lowest_Cp_step # set the weights to the lowest Cp step self.__weights = np.asanyarray(Rrx2(trained_model, 'beta'))[lowest_Cp_step] self.__trained_model = trained_model # bind to an instance
class ENET(Classifier): """Elastic-Net regression (ENET) `Classifier`. Elastic-Net is the model selection algorithm from: :ref:`Zou and Hastie (2005) <ZH05>` 'Regularization and Variable Selection via the Elastic Net' Journal of the Royal Statistical Society, Series B, 67, 301-320. Similar to SMLR, it performs a feature selection while performing classification, but instead of starting with all features, it starts with none and adds them in, which is similar to boosting. Unlike LARS it has both L1 and L2 regularization (instead of just L1). This means that while it tries to sparsify the features it also tries to keep redundant features, which may be very very good for fMRI classification. In the true nature of the PyMVPA framework, this algorithm was actually implemented in R by Zou and Hastie and wrapped via RPy. To make use of ENET, you must have R and RPy installed as well as both the lars and elasticnet contributed package. You can install the R and RPy with the following command on Debian-based machines: sudo aptitude install python-rpy python-rpy-doc r-base-dev You can then install the lars and elasticnet package by running R as root and calling: install.packages() """ __tags__ = [ 'enet', 'regression', 'linear', 'has_sensitivity', 'does_feature_selection', 'rpy2' ] def __init__(self, lm=1.0, trace=False, normalize=True, intercept=True, max_steps=None, **kwargs): """ Initialize ENET. See the help in R for further details on the following parameters: Parameters ---------- lm : float Penalty parameter. 0 will perform LARS with no ridge regression. Default is 1.0. trace : boolean Whether to print progress in R as it works. normalize : boolean Whether to normalize the L2 Norm. intercept : boolean Whether to add a non-penalized intercept to the model. max_steps : None or int If not None, specify the total number of iterations to run. Each iteration adds a feature, but leaving it none will add until convergence. """ # init base class first Classifier.__init__(self, **kwargs) # set up the params self.__lm = lm self.__normalize = normalize self.__intercept = intercept self.__trace = trace self.__max_steps = max_steps # pylint friendly initializations self.__weights = None """The beta weights for each feature.""" self.__trained_model = None """The model object after training that will be used for predictions.""" # It does not make sense to calculate a confusion matrix for a # regression self.ca.enable('training_stats', False) def __repr__(self): """String summary of the object """ return """ENET(lm=%s, normalize=%s, intercept=%s, trace=%s, max_steps=%s, enable_ca=%s)""" % \ (self.__lm, self.__normalize, self.__intercept, self.__trace, self.__max_steps, str(self.ca.enabled)) def _train(self, data): """Train the classifier using `data` (`Dataset`). """ targets = data.sa[self.get_space()].value[:, np.newaxis] enet_kwargs = {} if self.__max_steps is not None: enet_kwargs['max.steps'] = self.__max_steps try: self.__trained_model = trained_model = \ r.enet(data.samples, targets, self.__lm, normalize=self.__normalize, intercept=self.__intercept, trace=self.__trace, **enet_kwargs) except RRuntimeError, e: raise FailedToTrainError, \ "Failed to predict on %s using %s. Exceptions was: %s" \ % (data, self, e) # find the step with the lowest Cp (risk) # it is often the last step if you set a max_steps # must first convert dictionary to array # Cp_vals = np.asarray([trained_model['Cp'][str(x)] # for x in range(len(trained_model['Cp']))]) # self.__lowest_Cp_step = Cp_vals.argmin() # set the weights to the last step beta_pure = np.asanyarray(Rrx2(trained_model, 'beta.pure')) self.__beta_pure_shape = beta_pure.shape self.__weights = np.zeros(data.nfeatures, dtype=beta_pure.dtype) ind = np.asanyarray(Rrx2(trained_model, 'allset')) - 1 self.__weights[ind] = beta_pure[-1, :]
class _GLMNET(Classifier): """GLM-Net regression (GLMNET) `Classifier`. GLM-Net is the model selection algorithm from: Friedman, J., Hastie, T. and Tibshirani, R. (2008) Regularization Paths for Generalized Linear Models via Coordinate Descent. http://www-stat.stanford.edu/~hastie/Papers/glmnet.pdf To make use of GLMNET, you must have R and RPy2 installed as well as the glmnet contributed package. You can install the R and RPy2 with the following command on Debian-based machines:: sudo aptitude install python-rpy2 r-base-dev You can then install the glmnet package by running R as root and calling:: install.packages() """ __tags__ = [ 'glmnet', 'linear', 'has_sensitivity', 'does_feature_selection', 'rpy2' ] family = Parameter('gaussian', constraints=EnsureChoice('gaussian', 'multinomial'), ro=True, doc="""Response type of your targets (either 'gaussian' for regression or 'multinomial' for classification).""") alpha = Parameter(1.0, constraints=EnsureFloat() & EnsureRange(min=0.01, max=1.0), doc="""The elastic net mixing parameter. Larger values will give rise to less L2 regularization, with alpha=1.0 as a true LASSO penalty.""") nlambda = Parameter(100, constraints=EnsureInt() & EnsureRange(min=1), doc="""Maximum number of lambdas to calculate before stopping if not converged.""") standardize = Parameter(True, constraints='bool', doc="""Whether to standardize the variables prior to fitting.""") thresh = Parameter(1e-4, constraints=EnsureFloat() & EnsureRange(min=1e-10, max=1.0), doc="""Convergence threshold for coordinate descent.""") pmax = Parameter(None, constraints=((EnsureInt() & EnsureRange(min=1)) | EnsureNone()), doc="""Limit the maximum number of variables ever to be nonzero.""") maxit = Parameter(100, constraints=EnsureInt() & EnsureRange(min=10), doc="""Maximum number of outer-loop iterations for 'multinomial' families.""") model_type = Parameter('covariance', constraints=EnsureChoice('covariance', 'naive'), doc="""'covariance' saves all inner-products ever computed and can be much faster than 'naive'. The latter can be more efficient for nfeatures>>nsamples situations.""") def __init__(self, **kwargs): """ Initialize GLM-Net. See the help in R for further details on the parameters """ # init base class first Classifier.__init__(self, **kwargs) # pylint friendly initializations self._utargets = None self.__weights = None """The beta weights for each feature.""" self.__trained_model = None """The model object after training that will be used for predictions.""" self.__last_lambda = None """Lambda obtained on the last step""" # def __repr__(self): # """String summary of the object # """ # return """ENET(lm=%s, normalize=%s, intercept=%s, trace=%s, max_steps=%s, enable_ca=%s)""" % \ # (self.__lm, # self.__normalize, # self.__intercept, # self.__trace, # self.__max_steps, # str(self.ca.enabled)) def _train(self, dataset): """Train the classifier using `data` (`Dataset`). """ # process targets based on the model family targets = dataset.sa[self.get_space()].value if self.params.family == 'gaussian': # do nothing, just save the targets as a list #targets = targets.tolist() self._utargets = None elif self.params.family == 'multinomial': # turn lables into list of range values starting at 1 #targets = _label2indlist(dataset.targets, # dataset.uniquetargets) targets_unique = dataset.sa[self.get_space()].unique targets = _label2oneofm(targets, targets_unique) # save some properties of the data/classification self._utargets = targets_unique.copy() # process the pmax if self.params.pmax is None: # set it to the num features pmax = dataset.nfeatures else: # use the value pmax = self.params.pmax try: self.__trained_model = trained_model = \ r.glmnet(dataset.samples, targets, family=self.params.family, alpha=self.params.alpha, nlambda=self.params.nlambda, standardize=self.params.standardize, thresh=self.params.thresh, pmax=pmax, maxit=self.params.maxit, type=self.params.model_type) except RRuntimeError, e: raise FailedToTrainError, \ "Failed to train %s on %s. Got '%s' during call r.glmnet()." \ % (self, dataset, e) self.__last_lambda = last_lambda = \ np.asanyarray(Rrx2(trained_model, 'lambda'))[-1] # set the weights to the last step weights = r.coef(trained_model, s=last_lambda) if self.params.family == 'multinomial': self.__weights = np.hstack( [np.array(r['as.matrix'](weight))[1:] for weight in weights]) elif self.params.family == 'gaussian': self.__weights = np.array(r['as.matrix'](weights))[1:, 0] else: raise NotImplementedError, \ "Somehow managed to get here with family %s." % \ (self.params.family,)