예제 #1
0
파일: outliers.py 프로젝트: apodemus/tsa
def running_sigma_clip(x, sig=3., nwindow=100, noverlap=0, iters=None, cenfunc=np.ma.median, varfunc=np.ma.var):
    #TODO:  Incorporate in WindowOutlierDetection
    
    #SLOWWWWWWWWW...................
    
    if noverlap:
        print( 'Overlap not implemented yet!  Setting noverlap=0' )
        noverlap=0

    sections = Div.div( x, nwindow, 0 )
    
    filtered_data = []
    for sec in sections:
        filtered_sec = np.ma.masked_where(np.isnan(sec), sec)           #BUT WHAT IF THIS IS A MASKED ARRAY ALREADY WITH NON-NAN VALUES MASKED??

        if iters is None:
            i = -1
            lastrej = filtered_sec.count() + 1
            while (filtered_sec.count() != lastrej):
                i += 1
                lastrej = filtered_sec.count()
                secdiv = filtered_sec - cenfunc(filtered_sec)
                filtered_sec.mask |= np.ma.greater( secdiv*secdiv, varfunc(secdiv)*sig**2 )
                #print( filtered_sec.mask )
            #iters = i + 1
        else:
            for i in range(iters):
                secdiv = filtered_sec - cenfunc(filtered_sec)
                filtered_sec.mask |= np.ma.greater( secdiv*secdiv, varfunc(secdiv)*sig**2 )

        filtered_data.append( filtered_sec )
    
    return np.ma.concatenate(filtered_data)[:len(x)]
예제 #2
0
파일: outliers.py 프로젝트: apodemus/tsa
def WindowOutlierDetection(data, nwindow, noverlap, method, weight_kernel=None, 
                           return_index=True, return_mask=False, return_masked_data=False,
                           *args, **kwargs): #recur=None
    
    noverlap = Spectral.get_overlap(nwindow, noverlap)
    
    N = data.shape[-1]
    if N<nwindow:
        warn( 'Data length smaller than window size! No clipping done!')
        return data.mask
        
    step = nwindow - noverlap
    noc = Div.get_nocc(N, nwindow, noverlap)

    if weight_kernel is None:
        weight_kernel = 'boxcar'
    weights = get_window( weight_kernel, nwindow )

    S = defaultdict(int)
    #q = Div.div(data, nwindow, noverlap)
    
    #if data.ndim>1:
        #q = q.transpose(1,0,2) #swap the axes around so we can enumerate easily 
    
    #if np.ma.is_masked(data):
        #embed()
    
    for i, dat in enumerate( Div.div(data, nwindow, noverlap) ):
        widx = method(dat.T, *args, **kwargs)  #indeces of outliers relative to this window
        if len(widx):
            didx = i*step + np.array(widx)     #indeces relative to data
            didx = didx[didx<N]                #remove indeces that exceed array dimensions
            for ii,jj in zip(widx, didx):
                S[jj] += weights[ii] / noc[jj]  #mean probability that points where flagged as outliers

    IDX = np.sort( [idx for idx,p in S.items() if p>0.5] )
    
    if return_index:
        return IDX
    
    if  return_mask:
        mask = np.zeros(data.shape, bool)
        if len(IDX):
            mask[IDX] = True
        return mask
    
    if return_masked_data:
        if np.ma.isMA(data):
            data.mask[IDX] = True
        else:
            mask = np.zeros_like(data, bool)
            mask[IDX] = True
            data = np.ma.masked_where(mask, data)

        return data