def __init__(self, models, transforms=None, stats=None, algorithm=None): self._models = models if transforms is None: self._transforms = TransformList() else: self._transforms = transforms self._stats = stats self._algorithm = algorithm
def _prepare(self, X): X = vstack([X, sqrt(self.c) * self.R]) estimator = TikhonovPseudoInv(X, self.nPenalties, intercept=self._intercept) transforms = TransformList() return estimator, transforms
class RegressionModel(object): """ Class for fitted regression models. Backed by an RDD of of key-value pairs. The keys are the tuple identifiers from the Series of response variables used in the fit. The values are per-record regression models that contain the coefficients from the fit, though these models are not directly exposed. """ def __init__(self, models, transforms=None, stats=None, algorithm=None): self._models = models if transforms is None: self._transforms = TransformList() else: self._transforms = transforms self._stats = stats self._algorithm = algorithm def __repr__(self): lines = [] lines.append(self.__class__.__name__) if self._transforms.transforms is None: t = "None" else: t = ", ".join([str(x.__class__.__name__) for x in self._transforms.transforms]) lines.append("transformations: " + t) lines.append("algorithm: " + str(self._algorithm)) return "\n".join(lines) @property def coeffs(self): """ Series containing the coefficients of the model. """ if not hasattr(self, "_coeffs"): self._coeffs = Series(self._models.mapValues(lambda v: v.betas)) return self._coeffs @property def stats(self): """ Series containing the R-squared values from the original fit of the model. """ if self._stats is None: return self._stats else: return Series(self._stats, index="R2") def predict(self, X): """ Predicts the responses given a design matrix Parameters ---------- X: array Design matrix of shape n x k, where n is the number of samples and k is the number of regressors. Even if an intercept term was fit, should NOT include a column of ones. Returns ------- yhat: Series Series of predictions (each of length n) """ X = self._transforms.apply(X) return Series(self._models.mapValues(lambda v: v.predict(X))) def score(self, X, y): """ Computes R-squared values for a single design matrix and multiple responses. Parameters ---------- X: array Design matrix of shape n x k, where n is the number of samples and k is the number of regressors. Even if an intercept term was fit, should NOT include a column of ones. y: Series Series of response variables where each record is a vector of length n, where n is the number of samples. Returns ------- scores: Series Series of R-squared values. """ X = self._transforms.apply(X) joined = self._models.join(y.rdd) newrdd = joined.mapValues(lambda (model, y): model.stats(X, y)) return Series(newrdd) def predictAndScore(self, X, y): X = self._transforms.apply(X) joined = self._models.join(y.rdd) results = joined.mapValues(lambda (model, y): model.predictWithStats(X, y)) yhat = results.mapValues(lambda v: v[0]) stats = results.mapValues(lambda v: v[1]) return Series(yhat), Series(stats)
class RegressionModel(object): """ Class for fitted regression models. Backed by an RDD of of key-value pairs. The keys are the tuple identifiers from the Series of response variables used in the fit. The values are per-record regression models that contain the coefficients from the fit, though these models are not directly exposed. """ def __init__(self, models, transforms=None, stats=None, algorithm=None): self._models = models if transforms is None: self._transforms = TransformList() else: self._transforms = transforms self._stats = stats self._algorithm = algorithm def __repr__(self): lines = [] lines.append(self.__class__.__name__) if self._transforms.transforms is None: t = 'None' else: t = ', '.join([ str(x.__class__.__name__) for x in self._transforms.transforms ]) lines.append('transformations: ' + t) lines.append('algorithm: ' + str(self._algorithm)) return '\n'.join(lines) @property def coeffs(self): """ Series containing the coefficients of the model. """ if not hasattr(self, '_coeffs'): self._coeffs = Series(self._models.mapValues(lambda v: v.betas)) return self._coeffs @property def stats(self): """ Series containing the R-squared values from the original fit of the model. """ if self._stats is None: return self._stats else: return Series(self._stats, index='R2') def predict(self, X): """ Predicts the responses given a design matrix Parameters ---------- X: array Design matrix of shape n x k, where n is the number of samples and k is the number of regressors. Even if an intercept term was fit, should NOT include a column of ones. Returns ------- yhat: Series Series of predictions (each of length n) """ X = self._transforms.apply(X) return Series(self._models.mapValues(lambda v: v.predict(X))) def score(self, X, y): """ Computes R-squared values for a single design matrix and multiple responses. Parameters ---------- X: array Design matrix of shape n x k, where n is the number of samples and k is the number of regressors. Even if an intercept term was fit, should NOT include a column of ones. y: Series Series of response variables where each record is a vector of length n, where n is the number of samples. Returns ------- scores: Series Series of R-squared values. """ X = self._transforms.apply(X) joined = self._models.join(y.rdd) newrdd = joined.mapValues(lambda (model, y): model.stats(X, y)) return Series(newrdd) def predictAndScore(self, X, y): X = self._transforms.apply(X) joined = self._models.join(y.rdd) results = joined.mapValues(lambda (model, y): model.predictWithStats(X, y)) yhat = results.mapValues(lambda v: v[0]) stats = results.mapValues(lambda v: v[1]) return Series(yhat), Series(stats)
def _prepare(self, X): if self._intercept: X = AddConstant().transform(X) estimator = PseudoInv(X) transforms = TransformList() return estimator, transforms