def barycenter_density(data, grid, upper, lower, vmin, dens = 0e0): ''' Recursively search for the initial seed on the densest regions of the sample. Uses numpy.histogramdd to divide multidimensional samples on regions. ''' rng = range(data.shape[1]) # Number of division on each variable nbin = map(int,array([grid]*data.shape[1])) # Define the number of points (hist) and the limits (edges) on each region. hist, edges = histogramdd(data,bins=nbin,range=tuple(zip(lower, upper))) # Transform the limits into a paired list. limits = array([list(pairwise(edges[i])) for i in rng]) # Find the indexes of the region limits that contains the max number of points. ind = unravel_index(argmax(hist), hist.shape) # Define the zone containing the maximum number of points. zone = array([limits[i,j] for i, j in izip(rng, ind)]) # Calculate density: density = amax(hist) / volume(zone) # Elements inside the zone: inside = filter(lambda x: x != None, \ imap(lambda i, y: boolist(i,y,zone), xrange(data.shape[0]), data)) # If density keep growing and vmin not reached, the function works recursively. # Stops when any of thoses conditions are not satisfied anymore. if density > dens and aall(mad(data[inside]) > diagonal(vmin)): zone = zone.T return barycenter_density(data, grid, zone[1], zone[0], vmin, density) else: return inside
def G(N, f, X, ct, iS): ''' G parameter --> Transforms a X2 estimator to a Gaussian estimator ''' #z2 = iR * asum( ( (X - ct) / (iS + TINY) )**2 ) # Z2 estimator # Mahalanobis distance estimator: X = X - ct z2 = asum( fabs( X * ravel( dot(iS, X ) ) ) ) # G transformation: if aall(N*f > 100e0): return sqrt(2e0*z2) - sqrt(2e0*f - 1e0) elif aall(N*f >= 30e0) and aall(N*f <= 100e0): return ((z2/f)**(1e0/3) - (1e0 - (2e0/9)*f))/sqrt((2e0/9)*f) elif aall(N*f < 30e0): return 9e9
def pearson_r2(X): r2 = corrcoef(zip(*X))**2 if aall(isnan(r2)) : r2 = eye(X.shape[1]) else: whereNaN = isnan(r2) r2[whereNaN] = 1e0 return matrix(r2)
def Robust_r2(X, ct, dev): ''' Shevlyakov 1997 - On a Robust Estimation of Correlation Coeficient''' warnings.simplefilter("ignore") X = (X - ct)/(dev + TINY) r2 = deque() for i in xrange(X.shape[1]): u = median(fabs(X.T + X[:,i]), axis=1)**2 v = median(fabs(X.T - X[:,i]), axis=1)**2 ri = (u - v)/(u + v) r2.append(ri**2) r2 = matrix(r2) if aall(isnan(r2)) : r2 = matrix(eye(X.shape[1])) else: whereNaN = isnan(r2) r2[whereNaN] = 1e0 return r2
def hyp_test(N, q1, f, key, x, ct, iS): ''' G hypothesis testing''' if aall(G(N, f, x, ct, iS) <= q1): #print(G(N, f, x, ct, iS)) return key