def lowess(x, y, f=2. / 3., iter=3): """lowess(x, y, f=2./3., iter=3) -> yest Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations. x and y should be numpy float arrays of equal length. The return value is also a numpy float array of that length. e.g. >>> import numpy >>> x = numpy.array([4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12, ... 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16, ... 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, ... 20, 22, 23, 24, 24, 24, 24, 25], numpy.float) >>> y = numpy.array([2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24, ... 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40, ... 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56, ... 64, 66, 54, 70, 92, 93, 120, 85], numpy.float) >>> result = lowess(x, y) >>> len(result) 50 >>> print "[%0.2f, ..., %0.2f]" % (result[0], result[-1]) [4.85, ..., 84.98] """ n = len(x) r = int(numpy.ceil(f * n)) h = [numpy.sort(abs(x - x[i]))[r] for i in range(n)] w = numpy.clip(abs(([x] - numpy.transpose([x])) / h), 0.0, 1.0) w = 1 - w * w * w w = w * w * w yest = numpy.zeros(n) delta = numpy.ones(n) for iteration in range(iter): for i in xrange(n): weights = delta * w[:, i] weights_mul_x = weights * x b1 = numpy.dot(weights, y) b2 = numpy.dot(weights_mul_x, y) A11 = sum(weights) A12 = sum(weights_mul_x) A21 = A12 A22 = numpy.dot(weights_mul_x, x) determinant = A11 * A22 - A12 * A21 beta1 = (A22 * b1 - A12 * b2) / determinant beta2 = (A11 * b2 - A21 * b1) / determinant yest[i] = beta1 + beta2 * x[i] residuals = y - yest s = median(abs(residuals)) delta[:] = numpy.clip(residuals / (6 * s), -1, 1) delta[:] = 1 - delta * delta delta[:] = delta * delta return yest
def lowess(x, y, f=2./3., iter=3): """lowess(x, y, f=2./3., iter=3) -> yest Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations.""" n = len(x) r = int(ceil(f*n)) h = [sort(abs(x-x[i]))[r] for i in range(n)] w = clip(abs(([x]-transpose([x]))/h),0.0,1.0) w = 1-w*w*w w = w*w*w yest = zeros(n,'d') delta = ones(n,'d') for iteration in range(iter): for i in range(n): weights = delta * w[:,i] b = array([sum(weights*y), sum(weights*y*x)]) A = array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = solve_linear_equations(A,b) yest[i] = beta[0] + beta[1]*x[i] residuals = y-yest s = median(abs(residuals)) delta = clip(residuals/(6*s),-1,1) delta = 1-delta*delta delta = delta*delta return yest
def test_median_mean(self): if TestCluster.module == 'Bio.Cluster': from Bio.Cluster import mean, median elif TestCluster.module == 'Pycluster': from Pycluster import mean, median data = numpy.array([34.3, 3, 2]) self.assertAlmostEqual(mean(data), 13.1, places=3) self.assertAlmostEqual(median(data), 3.0, places=3) data = [5, 10, 15, 20] self.assertAlmostEqual(mean(data), 12.5, places=3) self.assertAlmostEqual(median(data), 12.5, places=3) data = [1, 2, 3, 5, 7, 11, 13, 17] self.assertAlmostEqual(mean(data), 7.375, places=3) self.assertAlmostEqual(median(data), 6.0, places=3) data = [100, 19, 3, 1.5, 1.4, 1, 1, 1] self.assertAlmostEqual(mean(data), 15.988, places=3) self.assertAlmostEqual(median(data), 1.45, places=3)
def test_mean_median(module): if module == 'Bio.Cluster': from Bio.Cluster import mean, median elif module == 'Pycluster': from Pycluster import mean, median else: raise 'Unknown module name', module print "test_mean_median:" data1 = array([34.3, 3, 2]) data2 = [5, 10, 15, 20] data3 = [1, 2, 3, 5, 7, 11, 13, 17] data4 = [100, 19, 3, 1.5, 1.4, 1, 1, 1] for data in [data1, data2, data3, data4]: print "data =", print_row(data) print "mean is %7.3f; median is %7.3f" % (mean(data), median(data)) print
def test_mean_median(module): if module=='Bio.Cluster': from Bio.Cluster import mean, median elif module=='Pycluster': from Pycluster import mean, median else: raise 'Unknown module name', module print "test_mean_median:" data1 = array([ 34.3, 3, 2 ]) data2 = [ 5, 10 ,15, 20] data3 = [ 1, 2, 3, 5, 7, 11, 13, 17] data4 = [ 100, 19, 3, 1.5, 1.4, 1, 1, 1] for data in [data1, data2, data3, data4]: print "data =", print_row(data) print "mean is %7.3f; median is %7.3f" % (mean(data), median(data)) print