def stable_sd(x, n_sd=3., min_length=20): if len(x) < min_length: if len(x) == 1: return 0. else: return standardDeviation(x) x = Numeric.array(x) _x = x _outliers = 0. i = 0 while i < 10: mu = median(_x) sd = standardDeviation(_x, mu) outliers = Numeric.greater(abs(x-mu), n_sd*sd) if not Numeric.sum(outliers) or Numeric.sum(outliers==_outliers) == len(x): break _x = Numeric.compress(Numeric.logical_not(outliers), x) _outliers = outliers i += 1 return sd
def mad(m,axis=0): """Returns Median Absolute Deviation of the given array along the given axis. """ m = Numeric.asarray(m) mx = Numeric.asarray(median(m,axis),Numeric.Float) xt = Numeric.transpose(m, [axis]+range(axis)+range(axis+1,Numeric.rank(m))) # do not use swapaxes: (0,1,2) -swap-> (2,1,0); (0,1,2) -transpose-> (2,0,1) return MLab.median(Numeric.absolute(xt-mx))
def lowess2(x, y, xest, f=2./3., iter=3): """Returns estimated values of y in data points xest (or None if estimation fails). Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations.""" x = Numeric.asarray(x, 'd') y = Numeric.asarray(y, 'd') xest = Numeric.asarray(xest, 'd') n = len(x) nest = len(xest) r = min(int(Numeric.ceil(f*n)),n-1) # radius: num. of points to take into LR h = [Numeric.sort(abs(x-x[i]))[r] for i in range(n)] # distance of the r-th point from x[i] w = Numeric.clip(abs(([x]-Numeric.transpose([x]))/h),0.0,1.0) w = 1-w*w*w w = w*w*w hest = [Numeric.sort(abs(x-xest[i]))[r] for i in range(nest)] # r-th min. distance from xest[i] to x west = Numeric.clip(abs(([xest]-Numeric.transpose([x]))/hest),0.0,1.0) # shape: (len(x), len(xest) west = 1-west*west*west west = west*west*west yest = Numeric.zeros(n,'d') yest2 = Numeric.zeros(nest,'d') delta = Numeric.ones(n,'d') try: for iteration in range(iter): # fit xest for i in range(nest): weights = delta * west[:,i] b = Numeric.array([sum(weights*y), sum(weights*y*x)]) A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = LinearAlgebra.solve_linear_equations(A,b) yest2[i] = beta[0] + beta[1]*xest[i] # fit x (to calculate residuals and delta) for i in range(n): weights = delta * w[:,i] b = Numeric.array([sum(weights*y), sum(weights*y*x)]) A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = LinearAlgebra.solve_linear_equations(A,b) yest[i] = beta[0] + beta[1]*x[i] residuals = y-yest s = MLab.median(abs(residuals)) delta = Numeric.clip(residuals/(6*s),-1,1) delta = 1-delta*delta delta = delta*delta except LinearAlgebra.LinAlgError: print "Warning: NumExtn.lowess2: LinearAlgebra.solve_linear_equations: Singular matrix" yest2 = None return yest2
def compile_statistics(atoms, exclude=()): S = {} for key, values in atoms.items(): shifts, exposure = values #@UnusedVariable mean = median(shifts) sd = stable_sd(shifts) S[key] = mean, sd, shifts return S
def normalise_global(ref): R = [x[0] for x in ref.values() if x[0] is not None] try: ref0 = median(R) except: print R print ref raise d = {} for key, value in ref.items(): if value[0] is not None: value = (value[0]-ref0,) + value[1:] d[key] = value return d, ref0
n_one = 0 while n_one < 21: one_sigma_mags = Numeric.compress( Numeric.less_equal(abs(em[i, :] - 0.7526), dm), m[i, :]) n_one = len(one_sigma_mags) dm += 0.01 if dm > 0.03 and dm < 1.1: message = "Warning: not enough 1-sigma objects in the catalog. Using dm=+-%.2f" % dm self.logfile.write(message) if dm >= 1.1: message = "Warning: Stopped searching for 1-sigma objects at dm=+-%.2f" % dm self.logfile.write(message) break try: limMag = MLab.median(Numeric.sort(one_sigma_mags)[-20:]) self.logfile.write("Limiting Mag " + basefits + ":" + str(limMag)) print "Limiting Mag " + basefits + ":" + str(limMag) except Exception, err: print "Exception raised: could not determine limiting magnitude." print err #---------------------- End Limiting Magnitude Section ----------------------# #self.outColumns.append(imfilter +'_MAG_ISO') #self.outColumns.append(imfilter +'_MAGERR_ISO') self.outColumns.append(imfilter + '_MAGCORR_ISO') self.outColumns.append(imfilter + '_MAGERRCORR_ISO') self.outColumns.append(imfilter + '_APER_CORR') self.outColumns.append(imfilter + '_MAG_BPZ')
def lowessW(x, y, xest, f=2./3., iter=3, dWeights=None, callback=None): """Returns estimated values of y in data points xest (or None if estimation fails). Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations. Data points may be assigned weights; if None, all weights equal 1. """ x = Numeric.asarray(x, 'd') y = Numeric.asarray(y, 'd') xest = Numeric.asarray(xest, 'd') n = len(x) if n <> len(y): raise AttributeError, "Error: lowessW(x,y,xest,f,iter,dWeights): len(x)=%i not equal to len(y)=%i" % (len(x), len(y)) nest = len(xest) # weights of data points (optional) if dWeights <> None: dWeights = Numeric.asarray(dWeights, 'd') if len(dWeights) <> n: raise AttributeError, "Error: lowessW(x,y,xest,f,iter,dWeights): len(dWeights)=%i not equal to len(x)=%i" % (len(dWeights), len(x)) ## dWeights = dWeights.reshape((n,1)) else: ## dWeights = Numeric.ones((n,1)) dWeights = Numeric.ones((n,)) r = min(int(Numeric.ceil(f*n)),n-1) # radius: num. of points to take into LR h = [Numeric.sort(abs(x-x[i]))[r] for i in range(n)] # distance of the r-th point from x[i] w = Numeric.clip(abs(([x]-Numeric.transpose([x]))/h),0.0,1.0) w = 1-w*w*w w = w*w*w hest = [Numeric.sort(abs(x-xest[i]))[r] for i in range(nest)] # r-th min. distance from xest[i] to x west = Numeric.clip(abs(([xest]-Numeric.transpose([x]))/hest),0.0,1.0) # shape: (len(x), len(xest)) west = 1-west*west*west west = west*west*west yest = Numeric.zeros(n,'d') yest2 = Numeric.zeros(nest,'d') delta = Numeric.ones(n,'d') try: for iteration in range(int(iter)): # fit xest for i in range(nest): ## print delta.shape, west[:,i].shape, dWeights.shape weights = delta * west[:,i] * dWeights b = Numeric.array([sum(weights*y), sum(weights*y*x)]) A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = LinearAlgebra.solve_linear_equations(A,b) yest2[i] = beta[0] + beta[1]*xest[i] # fit x (to calculate residuals and delta) for i in range(n): weights = delta * w[:,i] * dWeights b = Numeric.array([sum(weights*y), sum(weights*y*x)]) A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]]) beta = LinearAlgebra.solve_linear_equations(A,b) yest[i] = beta[0] + beta[1]*x[i] residuals = y-yest s = MLab.median(abs(residuals)) delta = Numeric.clip(residuals/(6*s),-1,1) delta = 1-delta*delta delta = delta*delta if callback: callback() except LinearAlgebra.LinAlgError: print "Warning: NumExtn.lowessW: LinearAlgebra.solve_linear_equations: Singular matrix" yest2 = None return yest2
def estimate_reference_single(entry, stats, bounds, ref=0.0, verbose=False, exclude=None, entry_name=None, atom_type='H', exclude_outliers=False,molType='protein'): A = 0. B = 0. S = 0. N = 1 ## loop through all atom types classes = decompose_classes(entry, bounds, atom_type,molType=molType) if exclude and not entry_name: raise TypeError, 'attribute entry_name needs to be set.' n_excluded = 0 n_total = 0 for key, shifts in classes.items(): ## print entry_name, key if not key in stats: if verbose: print key,'no statistics.' continue if exclude and (entry_name, key) in exclude: print entry_name, key, 'excluded from ref estimation.' continue ## get statistics for current atom type mu, sd = stats[key][:2] k = 1./sd**2 if exclude_outliers is not False: ## calculate Z scores and exclude shifts with high Z scores from analysis Z = abs(shifts-mu)/sd mask_include = Numeric.less(Z, exclude_outliers) shifts = Numeric.compress(mask_include, shifts) n_excluded += len(Z)-Numeric.sum(mask_include) n_total += len(Z) n = len(shifts) if not n: continue A += k*n*(median(shifts)-mu) B += k*n S += -0.5*len(shifts)*Numeric.log(k)+0.5*k*sum((Numeric.array(shifts)-mu-ref)**2) N += n if B > 0.: ref_mu = A/B ref_sd = 1./Numeric.sqrt(B) else: ref_mu = None ref_sd = None if exclude_outliers is not False and n_excluded == n_total: print '%d/%d outliers discarded' % (n_excluded, n_total) return ref_mu, ref_sd, S/N