def load(self, beta): """Generate the simulated data. Parameters ---------- beta : Numpy array (p-by-1). The regression vector to generate data from. """ if self.snr != None: old_snr = self.snr self.snr = None try: def f(x): X, y, _ = self.load(x * beta) # print "snr = %.5f = %.5f = |X.b| / |e| = %.5f / %.5f" \ # % (old_snr, np.linalg.norm(np.dot(X, x * beta)) \ # / np.linalg.norm(self.e), # np.linalg.norm(np.dot(X, x * beta)), # np.linalg.norm(self.e)) return (np.linalg.norm(np.dot(X, x * beta)) \ / np.linalg.norm(self.e)) - old_snr snr = utils.bisection_method(f, low=0.0, high=np.sqrt(old_snr), maxiter=30) finally: self.snr = old_snr beta = beta * snr grad = 0.0 for p in self.penalties: grad -= p.grad(beta[1:, :] if self.intercept else beta) Mte = np.dot(self.X0.T, self.e) if self.intercept: Mte = Mte[1:, :] alpha = np.divide(grad, Mte) p = beta.shape[0] start = 1 if self.intercept else 0 X = np.ones(self.X0.shape) for i in xrange(start, p): X[:, i] = self.X0[:, i] * alpha[i - start, 0] y = np.dot(X, beta) - self.e return X, y, beta
def load(l, k, g, beta, M, e, A, snr=None, intercept=False): """Returns data generated such that we know the exact solution. The data generated by this function is fit to the Linear regression + L1 + L2 + Total variation function, i.e. to: f(b) = (1 / 2).|Xb - y|² + l.|b|_1 + (k / 2).|b|² + g.TV(b), where |.|_1 is the L1 norm, |.|² is the squared L2 norm and TV is the total variation penalty. Parameters ---------- l : The L1 regularisation parameter. k : The L2 regularisation parameter. g : The total variation regularisation parameter. beta : The regression vector to generate data from. M : The matrix to use when building data. This matrix carries the desired correlation structure of the generated data. The generated data will be a column-scaled version of this matrix. e : The error vector e = Xb - y. This vector carries the desired distribution of the residual. A : The linear operator for the Nesterov function. snr : Signal-to-noise ratio between model and residual. intercept : Boolean. Whether or not to include an intercept variable. This variable is not penalised. Note that if intercept is True, then e will be centred. Returns ------- X : The generated X matrix. y : The generated y vector. beta : The regression vector with the correct snr. """ l = float(l) k = float(k) g = float(g) if intercept: e = e - np.mean(e) if snr != None: def f(x): X, y = _generate(l, k, g, x * beta, M, e, A, intercept) # print "snr = %.5f = %.5f = |X.b| / |e| = %.5f / %.5f" \ # % (snr, np.linalg.norm(np.dot(X, x * beta)) \ # / np.linalg.norm(e), # np.linalg.norm(np.dot(X, x * beta)), np.linalg.norm(e)) return (np.linalg.norm(np.dot(X, x * beta)) \ / np.linalg.norm(e)) - snr snr = bisection_method(f, low=0.0, high=np.sqrt(snr), maxiter=30) beta = beta * snr X, y = _generate(l, k, g, beta, M, e, A, intercept) return X, y, beta
def load(l, k, g, beta, M, e, A, mu, snr=None, intercept=False): """Returns data generated such that we know the exact solution. The data generated by this function is fit to the Linear regression + L1 + L2 + Smoothed group lasso function, i.e.: f(b) = (1 / 2).|Xb - y|² + l.|b|_1 + (k / 2).|b|² + g.GLmu(b), where |.|_1 is the L1 norm, |.|² is the squared L2 norm and GLmu is the smoothed group lasso penalty. Parameters ---------- l : Non-negative float. The L1 regularisation parameter. k : Non-negative float. The L2 regularisation parameter. g : Non-negative float. The group lasso regularisation parameter. beta : Numpy array (p-by-1). The regression vector to generate data from. M : Numpy array (n-by-p). The matrix to use when building data. This matrix carries the desired correlation structure of the generated data. The generated data will be a column-scaled version of this matrix. e : Numpy array (n-by-1). The error vector e = Xb - y. This vector carries the desired distribution of the residual. A : Numpy or (usually) scipy.sparse array (K-by-p). The linear operator for the Nesterov function. mu : Non-negative float. The Nesterov smoothing regularisation parameter. snr : Positive float. Signal-to-noise ratio between model and residual. intercept : Boolean. Whether or not to include an intercept variable. This variable is not penalised. Note that if intercept is True, then e will be centred. Returns ------- X : Numpy array (n-by-p). The generated X matrix. y : Numpy array (n-by-1). The generated y vector. beta : Numpy array (p-by-1). The regression vector with the correct snr. """ l = float(l) k = float(k) g = float(g) if intercept: e = e - np.mean(e) if snr != None: def f(x): X, y = _generate(l, k, g, x * beta, M, e, A, mu, intercept) # print "snr = %.5f = %.5f = |X.b| / |e| = %.5f / %.5f" \ # % (snr, np.linalg.norm(np.dot(X, x * beta)) \ # / np.linalg.norm(e), # np.linalg.norm(np.dot(X, x * beta)), np.linalg.norm(e)) return (np.linalg.norm(np.dot(X, x * beta)) / np.linalg.norm(e)) \ - snr snr = bisection_method(f, low=0.0, high=np.sqrt(snr), maxiter=30) beta = beta * snr X, y = _generate(l, k, g, beta, M, e, A, mu, intercept) return X, y, beta
def load(l, k, g, beta, M, e, mu, snr=None, shape=None): """Returns data generated such that we know the exact solution. The data generated by this function is fit to the Linear regression + L1 + L2 + Smoothed total variation function, i.e.: f(b) = (1 / 2).|Xb - y|² + l.L1mu(b) + (k / 2).|b|² + g.TVmu(b), where L1mu is the smoothed L1 norm, |.|² is the squared L2 norm and TVmu is the smoothed total variation penalty. Parameters ---------- l : The L1 regularisation parameter. k : The L2 regularisation parameter. g : The total variation regularisation parameter. beta : The regression vector to generate data from. M : The matrix to use when building data. This matrix carries the desired correlation structure of the generated data. The generated data will be a column-scaled version of this matrix. e : The error vector e = Xb - y. This vector carries the desired distribution of the residual. mu : The Nesterov smoothing regularisation parameter. snr : Signal-to-noise ratio between model and residual. shape : The underlying dimension of the regression vector, beta. E.g. the beta may represent an underlying 3D image. In that case the shape is a three-tuple with dimensions (Z, Y, X). If shape is not provided, the shape is set to (p,) where p is the dimension of beta. Returns ------- X : The generated X matrix. y : The generated y vector. beta : The regression vector with the correct snr. """ l = float(l) k = float(k) g = float(g) if shape == None: shape = (beta.shape[0], ) if snr != None: def f(x): X, y = _generate(l, k, g, x * beta, M, e, mu, shape) # print "norm(beta) = ", np.linalg.norm(beta) # print "norm(Xbeta) = ", np.linalg.norm(np.dot(X, beta)) # print "norm(e) = ", np.linalg.norm(e) print "snr = %.5f = %.5f = |X.b| / |e| = %.5f / %.5f" \ % (snr, np.linalg.norm(np.dot(X, x * beta)) \ / np.linalg.norm(e), np.linalg.norm(np.dot(X, x * beta)), np.linalg.norm(e)) return (np.linalg.norm(np.dot(X, x * beta)) / np.linalg.norm(e)) \ - snr snr = bisection_method(f, low=0.0, high=np.sqrt(snr), maxiter=30) beta = beta * snr X, y = _generate(l, k, g, beta, M, e, mu, shape) return X, y, beta
def load(l, k, g, beta, M, e, mu, snr=None, shape=None): """Returns data generated such that we know the exact solution. The data generated by this function is fit to the Linear regression + L1 + L2 + Smoothed total variation function, i.e.: f(b) = (1 / 2).|Xb - y|² + l.L1mu(b) + (k / 2).|b|² + g.TVmu(b), where L1mu is the smoothed L1 norm, |.|² is the squared L2 norm and TVmu is the smoothed total variation penalty. Parameters ---------- l : The L1 regularisation parameter. k : The L2 regularisation parameter. g : The total variation regularisation parameter. beta : The regression vector to generate data from. M : The matrix to use when building data. This matrix carries the desired correlation structure of the generated data. The generated data will be a column-scaled version of this matrix. e : The error vector e = Xb - y. This vector carries the desired distribution of the residual. mu : The Nesterov smoothing regularisation parameter. snr : Signal-to-noise ratio between model and residual. shape : The underlying dimension of the regression vector, beta. E.g. the beta may represent an underlying 3D image. In that case the shape is a three-tuple with dimensions (Z, Y, X). If shape is not provided, the shape is set to (p,) where p is the dimension of beta. Returns ------- X : The generated X matrix. y : The generated y vector. beta : The regression vector with the correct snr. """ l = float(l) k = float(k) g = float(g) if shape == None: shape = (beta.shape[0],) if snr != None: def f(x): X, y = _generate(l, k, g, x * beta, M, e, mu, shape) # print "norm(beta) = ", np.linalg.norm(beta) # print "norm(Xbeta) = ", np.linalg.norm(np.dot(X, beta)) # print "norm(e) = ", np.linalg.norm(e) print "snr = %.5f = %.5f = |X.b| / |e| = %.5f / %.5f" \ % (snr, np.linalg.norm(np.dot(X, x * beta)) \ / np.linalg.norm(e), np.linalg.norm(np.dot(X, x * beta)), np.linalg.norm(e)) return (np.linalg.norm(np.dot(X, x * beta)) / np.linalg.norm(e)) \ - snr snr = bisection_method(f, low=0.0, high=np.sqrt(snr), maxiter=30) beta = beta * snr X, y = _generate(l, k, g, beta, M, e, mu, shape) return X, y, beta
def load(l, k, g, beta, M, e, A, snr=None, intercept=False): """Returns data generated such that we know the exact solution. The data generated by this function is fit to the Linear regression + L1 + L2 + Total variation function, i.e. to: f(b) = (1 / 2).|Xb - y|² + l.|b|_1 + (k / 2).|b|² + g.TV(b), where |.|_1 is the L1 norm, |.|² is the squared L2 norm and TV is the total variation penalty. Parameters ---------- l : The L1 regularisation parameter. k : The L2 regularisation parameter. g : The total variation regularisation parameter. beta : The regression vector to generate data from. M : The matrix to use when building data. This matrix carries the desired correlation structure of the generated data. The generated data will be a column-scaled version of this matrix. e : The error vector e = Xb - y. This vector carries the desired distribution of the residual. A : The linear operator for the Nesterov function. snr : Signal-to-noise ratio between model and residual. intercept : Boolean. Whether or not to include an intercept variable. This variable is not penalised. Note that if intercept is True, then e will be centred. Returns ------- X : The generated X matrix. y : The generated y vector. beta : The regression vector with the correct snr. """ l = float(l) k = float(k) g = float(g) if intercept: e = e - np.mean(e) if snr != None: def f(x): X, y = _generate(l, k, g, x * beta, M, e, A, intercept) print "snr = %.5f = %.5f = |X.b| / |e| = %.5f / %.5f" % ( snr, np.linalg.norm(np.dot(X, x * beta)) / np.linalg.norm(e), np.linalg.norm(np.dot(X, x * beta)), np.linalg.norm(e), ) return (np.linalg.norm(np.dot(X, x * beta)) / np.linalg.norm(e)) - snr snr = bisection_method(f, low=0.0, high=np.sqrt(snr), maxiter=30) beta = beta * snr X, y = _generate(l, k, g, beta, M, e, A, intercept) return X, y, beta