コード例 #1
1
ファイル: lowess.py プロジェクト: joshainglis/biopython
def lowess(x, y, f=2. / 3., iter=3):
    """lowess(x, y, f=2./3., iter=3) -> yest

    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.

    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.

    x and y should be numpy float arrays of equal length.  The return value is
    also a numpy float array of that length.

    e.g.
    >>> import numpy
    >>> x = numpy.array([4,  4,  7,  7,  8,  9, 10, 10, 10, 11, 11, 12, 12, 12,
    ...                 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16,
    ...                 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20,
    ...                 20, 22, 23, 24, 24, 24, 24, 25], numpy.float)
    >>> y = numpy.array([2, 10,  4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24,
    ...                 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40,
    ...                 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56,
    ...                 64, 66, 54, 70, 92, 93, 120, 85], numpy.float)
    >>> result = lowess(x, y)
    >>> len(result)
    50
    >>> print "[%0.2f, ..., %0.2f]" % (result[0], result[-1])
    [4.85, ..., 84.98]
    """
    n = len(x)
    r = int(numpy.ceil(f * n))
    h = [numpy.sort(abs(x - x[i]))[r] for i in range(n)]
    w = numpy.clip(abs(([x] - numpy.transpose([x])) / h), 0.0, 1.0)
    w = 1 - w * w * w
    w = w * w * w
    yest = numpy.zeros(n)
    delta = numpy.ones(n)
    for iteration in range(iter):
        for i in xrange(n):
            weights = delta * w[:, i]
            weights_mul_x = weights * x
            b1 = numpy.dot(weights, y)
            b2 = numpy.dot(weights_mul_x, y)
            A11 = sum(weights)
            A12 = sum(weights_mul_x)
            A21 = A12
            A22 = numpy.dot(weights_mul_x, x)
            determinant = A11 * A22 - A12 * A21
            beta1 = (A22 * b1 - A12 * b2) / determinant
            beta2 = (A11 * b2 - A21 * b1) / determinant
            yest[i] = beta1 + beta2 * x[i]
        residuals = y - yest
        s = median(abs(residuals))
        delta[:] = numpy.clip(residuals / (6 * s), -1, 1)
        delta[:] = 1 - delta * delta
        delta[:] = delta * delta
    return yest
コード例 #2
0
def lowess(x, y, f=2./3., iter=3):
  """lowess(x, y, f=2./3., iter=3) -> yest

Lowess smoother: Robust locally weighted regression.
The lowess function fits a nonparametric regression curve to a scatterplot.
The arrays x and y contain an equal number of elements; each pair
(x[i], y[i]) defines a data point in the scatterplot. The function returns
the estimated (smooth) values of y.

The smoothing span is given by f. A larger value for f will result in a
smoother curve. The number of robustifying iterations is given by iter. The
function will run faster with a smaller number of iterations."""
  n = len(x)
  r = int(ceil(f*n))
  h = [sort(abs(x-x[i]))[r] for i in range(n)]
  w = clip(abs(([x]-transpose([x]))/h),0.0,1.0)
  w = 1-w*w*w
  w = w*w*w
  yest = zeros(n,'d')
  delta = ones(n,'d')
  for iteration in range(iter):
    for i in range(n):
      weights = delta * w[:,i]
      b = array([sum(weights*y), sum(weights*y*x)])
      A = array([[sum(weights),   sum(weights*x)],
                 [sum(weights*x), sum(weights*x*x)]])
      beta = solve_linear_equations(A,b)
      yest[i] = beta[0] + beta[1]*x[i]
    residuals = y-yest
    s = median(abs(residuals))
    delta = clip(residuals/(6*s),-1,1)
    delta = 1-delta*delta
    delta = delta*delta
  return yest
コード例 #3
0
def lowess(x, y, f=2. / 3., iter=3):
    """lowess(x, y, f=2./3., iter=3) -> yest

    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.

    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.

    x and y should be numpy float arrays of equal length.  The return value is
    also a numpy float array of that length.

    e.g.
    >>> import numpy
    >>> x = numpy.array([4,  4,  7,  7,  8,  9, 10, 10, 10, 11, 11, 12, 12, 12,
    ...                 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16,
    ...                 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20,
    ...                 20, 22, 23, 24, 24, 24, 24, 25], numpy.float)
    >>> y = numpy.array([2, 10,  4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24,
    ...                 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40,
    ...                 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56,
    ...                 64, 66, 54, 70, 92, 93, 120, 85], numpy.float)
    >>> result = lowess(x, y)
    >>> len(result)
    50
    >>> print "[%0.2f, ..., %0.2f]" % (result[0], result[-1])
    [4.85, ..., 84.98]
    """
    n = len(x)
    r = int(numpy.ceil(f * n))
    h = [numpy.sort(abs(x - x[i]))[r] for i in range(n)]
    w = numpy.clip(abs(([x] - numpy.transpose([x])) / h), 0.0, 1.0)
    w = 1 - w * w * w
    w = w * w * w
    yest = numpy.zeros(n)
    delta = numpy.ones(n)
    for iteration in range(iter):
        for i in xrange(n):
            weights = delta * w[:, i]
            weights_mul_x = weights * x
            b1 = numpy.dot(weights, y)
            b2 = numpy.dot(weights_mul_x, y)
            A11 = sum(weights)
            A12 = sum(weights_mul_x)
            A21 = A12
            A22 = numpy.dot(weights_mul_x, x)
            determinant = A11 * A22 - A12 * A21
            beta1 = (A22 * b1 - A12 * b2) / determinant
            beta2 = (A11 * b2 - A21 * b1) / determinant
            yest[i] = beta1 + beta2 * x[i]
        residuals = y - yest
        s = median(abs(residuals))
        delta[:] = numpy.clip(residuals / (6 * s), -1, 1)
        delta[:] = 1 - delta * delta
        delta[:] = delta * delta
    return yest
コード例 #4
0
ファイル: test_Cluster.py プロジェクト: xulesc/biopython
    def test_median_mean(self):
        if TestCluster.module == 'Bio.Cluster':
            from Bio.Cluster import mean, median
        elif TestCluster.module == 'Pycluster':
            from Pycluster import mean, median

        data = numpy.array([34.3, 3, 2])
        self.assertAlmostEqual(mean(data), 13.1, places=3)
        self.assertAlmostEqual(median(data), 3.0, places=3)

        data = [5, 10, 15, 20]
        self.assertAlmostEqual(mean(data), 12.5, places=3)
        self.assertAlmostEqual(median(data), 12.5, places=3)

        data = [1, 2, 3, 5, 7, 11, 13, 17]
        self.assertAlmostEqual(mean(data), 7.375, places=3)
        self.assertAlmostEqual(median(data), 6.0, places=3)

        data = [100, 19, 3, 1.5, 1.4, 1, 1, 1]
        self.assertAlmostEqual(mean(data), 15.988, places=3)
        self.assertAlmostEqual(median(data), 1.45, places=3)
コード例 #5
0
def test_mean_median(module):
    if module == 'Bio.Cluster':
        from Bio.Cluster import mean, median
    elif module == 'Pycluster':
        from Pycluster import mean, median
    else:
        raise 'Unknown module name', module
    print "test_mean_median:"
    data1 = array([34.3, 3, 2])
    data2 = [5, 10, 15, 20]
    data3 = [1, 2, 3, 5, 7, 11, 13, 17]
    data4 = [100, 19, 3, 1.5, 1.4, 1, 1, 1]

    for data in [data1, data2, data3, data4]:
        print "data =",
        print_row(data)
        print "mean is %7.3f; median is %7.3f" % (mean(data), median(data))
    print
コード例 #6
0
ファイル: test_Cluster.py プロジェクト: mlyne/Scripts
def test_mean_median(module):
  if module=='Bio.Cluster':
    from Bio.Cluster import mean, median
  elif module=='Pycluster':
    from Pycluster import mean, median
  else:
    raise 'Unknown module name', module
  print "test_mean_median:"
  data1 = array([ 34.3, 3, 2 ])
  data2 = [ 5, 10 ,15, 20]
  data3 = [ 1, 2, 3, 5, 7, 11, 13, 17]
  data4 = [ 100, 19, 3, 1.5, 1.4, 1, 1, 1]

  for data in [data1, data2, data3, data4]:
    print "data =",
    print_row(data)
    print "mean is %7.3f; median is %7.3f" % (mean(data), median(data))
  print