Beispiel #1
0
def MA_zscore(G, R, window=1./5., padded=False, progressCallback=None):
    """ Return the Z-score of log2 fold ratio estimated from local
    distribution of log2 fold ratio values on the MA-plot
    """
    ratio, intensity = ratio_intensity(G, R)
    
    z_scores = numpy.ma.zeros(G.shape)
    sorted = list(numpy.ma.argsort(intensity))
    import math, random
    r = int(math.ceil(len(sorted)*window)) # number of window elements
    def local_indices(i, sorted):
        """ local indices in sorted (mirror padded if out of bounds)
        """
        start, end = i - r/2, i + r/2 + r%2
        pad_start , pad_end = [], []
        if start < 0:
            pad_start = sorted[:abs(start)]
            random.shuffle(pad_start)
            start = 0
        if end > len(sorted):
            pad_end = sorted[end - len(sorted):]
            random.shuffle(pad_end)
            end = len(sorted)
        
        if padded:
            return pad_start + sorted[start: end] + pad_end
        else:
            return sorted[start:end]
    
    milestones = orngMisc.progressBarMilestones(len(sorted))
    for i in range(len(sorted)):
        indices = local_indices(i, sorted)
        localRatio = numpy.take(ratio, indices)
        local_std = numpy.ma.std(localRatio)
        ind = sorted[i]
        z_scores[ind] = ratio[ind] / local_std
        if progressCallback and i in milestones:
            progressCallback(100. * i / len(sorted))
        
    z_scores._mask = - numpy.isfinite(z_scores)
    return z_scores
Beispiel #2
0
def lowess2(x, y, xest, f=2./3., iter=3, progressCallback=None):
    """Returns estimated values of y in data points xest (or None if estimation fails).
    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.

    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.
    
    Taken from Peter Juvan's numpyExtn.py, modified for numpy, computes pairwise
    distances inplace
    """
    x = numpy.asarray(x, 'f')
    y = numpy.asarray(y, 'f')
    xest = numpy.asarray(xest, 'f')
    n = len(x)
    nest = len(xest)
    r = min(int(numpy.ceil(f*n)),n-1) # radius: num. of points to take into LR
#    h = [numpy.sort(numpy.abs(x-x[i]))[r] for i in range(n)]    # distance of the r-th point from x[i]
    dist = [x] - numpy.transpose([x])
    dist = numpy.abs(dist, dist)
    dist.sort(axis=1)
    h = dist[:, r]
    del dist # to free memory
    w = [x] - numpy.transpose([x])
    w /= h
    w = numpy.abs(w, w)
    w = numpy.clip(w, 0.0, 1.0, w)
#    w = numpy.clip(numpy.abs(([x]-numpy.transpose([x]))/h),0.0,1.0)
    w **= 3
    w *= -1
    w += 1
#    w = 1 - w**3 #1-w*w*w
    w **= 3
#    w = w**3 #w*w*w
#    hest = [numpy.sort(numpy.abs(x-xest[i]))[r] for i in range(nest)]    # r-th min. distance from xest[i] to x
    dist = [x] - numpy.transpose([xest])
    dist = numpy.abs(dist, dist)
    dist.sort(axis=1)
    hest = dist[:, r]
    del dist # to free memory
#    west = numpy.clip(numpy.abs(([xest]-numpy.transpose([x]))/hest),0.0,1.0)  # shape: (len(x), len(xest)
    west = [xest]-numpy.transpose([x])
    west /= hest
    west = numpy.abs(west, west)
    west = numpy.clip(west, 0.0, 1.0, west)
#    west = 1 - west**3 #1-west*west*west
    west **= 3
    west *= -1
    west += 1
#    west = west**3 #west*west*west
    west **= 3
    yest = numpy.zeros(n,'f')
    yest2 = numpy.zeros(nest,'f')
    delta = numpy.ones(n,'f')
    iter_count = iter*(nest + n) if iter > 1 else nest
    milestones = orngMisc.progressBarMilestones(iter_count)
    curr_iter = 0
    for iteration in range(iter):
        # fit xest
        for i in range(nest):
            weights = delta * west[:,i]
            b = numpy.array([numpy.sum(weights*y), numpy.sum(weights*y*x)])
            A = numpy.array([[numpy.sum(weights), numpy.sum(weights*x)], [numpy.sum(weights*x), numpy.sum(weights*x*x)]])
            beta = numpy.linalg.solve(A, b)
            yest2[i] = beta[0] + beta[1]*xest[i]
            if progressCallback and curr_iter in milestones:
                progressCallback(100. * curr_iter / iter_count)
            curr_iter += 1
                
        # fit x (to calculate residuals and delta)
        if iter > 1:
            for i in range(n):
                weights = delta * w[:,i]
                b = numpy.array([numpy.sum(weights*y), numpy.sum(weights*y*x)])
                A = numpy.array([[numpy.sum(weights), numpy.sum(weights*x)], [numpy.sum(weights*x), numpy.sum(weights*x*x)]])
                beta = numpy.linalg.solve(A,b)
                yest[i] = beta[0] + beta[1]*x[i]
                if progressCallback and curr_iter in milestones:
                    progressCallback(100. * curr_iter / iter_count)
                curr_iter += 1
            residuals = y-yest
            s = numpy.median(numpy.abs(residuals))
            delta = numpy.clip(residuals/(6*s), -1, 1)
            delta = 1-delta*delta
            delta = delta*delta
    return yest2
Beispiel #3
0
def lowess(x, y, f=2./3., iter=3, progressCallback=None):
    """ Lowess taken from Bio.Statistics.lowess, modified to compute pairwise 
    distances inplace.
     
    lowess(x, y, f=2./3., iter=3) -> yest

    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.

    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.

    x and y should be numpy float arrays of equal length.  The return value is
    also a numpy float array of that length.

    e.g.
    >>> import numpy
    >>> x = numpy.array([4,  4,  7,  7,  8,  9, 10, 10, 10, 11, 11, 12, 12, 12,
    ...                 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16,
    ...                 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20,
    ...                 20, 22, 23, 24, 24, 24, 24, 25], numpy.float)
    >>> y = numpy.array([2, 10,  4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24,
    ...                 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40,
    ...                 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40,
    ...                 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56,
    ...                 64, 66, 54, 70, 92, 93, 120, 85], numpy.float)
    >>> result = lowess(x, y)
    >>> len(result)
    50
    >>> print "[%0.2f, ..., %0.2f]" % (result[0], result[-1])
    [4.85, ..., 84.98]
    """
    n = len(x)
    r = min(int(numpy.ceil(f*n)), n - 1)
    
#    h = [numpy.sort(numpy.abs(x-x[i]))[r] for i in range(n)]
#    h, xtmp = numpy.zeros_like(x), numpy.zeros_like(x)
#    for i in range(n):
#        xtmp = numpy.abs(x - x[i], xtmp)
#        h[i] = numpy.sort(xtmp)[r]
#    w = numpy.clip(numpy.abs(([x]-numpy.transpose([x]))/h),0.0,1.0)
    dist = [x] - numpy.transpose([x])
    dist = numpy.abs(dist, dist)
    dist.sort(axis=1)
    h = dist[:, r]
    del dist

    w = [x]-numpy.transpose([x])
    w /= h
    w = numpy.abs(w, w)
    w = numpy.clip(w, 0.0, 1.0, w)
#    w = 1-w*w*w
    w **= 3
    w *= -1
    w += 1
#    w = w*w*w
    w **= 3
    yest = numpy.zeros(n)
    delta = numpy.ones(n)
    milestones = orngMisc.progressBarMilestones(iter*n)
    for iteration in range(iter):
        for i in xrange(n):
            weights = delta * w[:,i]
            weights_mul_x = weights * x
            b1 = numpy.ma.dot(weights,y)
            b2 = numpy.ma.dot(weights_mul_x,y)
            A11 = sum(weights)
            A12 = sum(weights_mul_x)
            A21 = A12
            A22 = numpy.ma.dot(weights_mul_x,x)
            determinant = A11*A22 - A12*A21
            beta1 = (A22*b1-A12*b2) / determinant
            beta2 = (A11*b2-A21*b1) / determinant
            yest[i] = beta1 + beta2*x[i]
            if progressCallback and (iteration*n + i) in milestones:
                progressCallback((100. * iteration*n + i) /  (iter * n))
        residuals = y-yest
        s = median(abs(residuals))
        delta[:] = numpy.clip(residuals/(6*s),-1,1)
        delta[:] = 1-delta*delta
        delta[:] = delta*delta
    return yest