def running_sigma_clip(x, sig=3., nwindow=100, noverlap=0, iters=None, cenfunc=np.ma.median, varfunc=np.ma.var): #TODO: Incorporate in WindowOutlierDetection #SLOWWWWWWWWW................... if noverlap: print( 'Overlap not implemented yet! Setting noverlap=0' ) noverlap=0 sections = Div.div( x, nwindow, 0 ) filtered_data = [] for sec in sections: filtered_sec = np.ma.masked_where(np.isnan(sec), sec) #BUT WHAT IF THIS IS A MASKED ARRAY ALREADY WITH NON-NAN VALUES MASKED?? if iters is None: i = -1 lastrej = filtered_sec.count() + 1 while (filtered_sec.count() != lastrej): i += 1 lastrej = filtered_sec.count() secdiv = filtered_sec - cenfunc(filtered_sec) filtered_sec.mask |= np.ma.greater( secdiv*secdiv, varfunc(secdiv)*sig**2 ) #print( filtered_sec.mask ) #iters = i + 1 else: for i in range(iters): secdiv = filtered_sec - cenfunc(filtered_sec) filtered_sec.mask |= np.ma.greater( secdiv*secdiv, varfunc(secdiv)*sig**2 ) filtered_data.append( filtered_sec ) return np.ma.concatenate(filtered_data)[:len(x)]
def WindowOutlierDetection(data, nwindow, noverlap, method, weight_kernel=None, return_index=True, return_mask=False, return_masked_data=False, *args, **kwargs): #recur=None noverlap = Spectral.get_overlap(nwindow, noverlap) N = data.shape[-1] if N<nwindow: warn( 'Data length smaller than window size! No clipping done!') return data.mask step = nwindow - noverlap noc = Div.get_nocc(N, nwindow, noverlap) if weight_kernel is None: weight_kernel = 'boxcar' weights = get_window( weight_kernel, nwindow ) S = defaultdict(int) #q = Div.div(data, nwindow, noverlap) #if data.ndim>1: #q = q.transpose(1,0,2) #swap the axes around so we can enumerate easily #if np.ma.is_masked(data): #embed() for i, dat in enumerate( Div.div(data, nwindow, noverlap) ): widx = method(dat.T, *args, **kwargs) #indeces of outliers relative to this window if len(widx): didx = i*step + np.array(widx) #indeces relative to data didx = didx[didx<N] #remove indeces that exceed array dimensions for ii,jj in zip(widx, didx): S[jj] += weights[ii] / noc[jj] #mean probability that points where flagged as outliers IDX = np.sort( [idx for idx,p in S.items() if p>0.5] ) if return_index: return IDX if return_mask: mask = np.zeros(data.shape, bool) if len(IDX): mask[IDX] = True return mask if return_masked_data: if np.ma.isMA(data): data.mask[IDX] = True else: mask = np.zeros_like(data, bool) mask[IDX] = True data = np.ma.masked_where(mask, data) return data